List of usage examples for org.apache.commons.cli PosixParser PosixParser
PosixParser
From source file:edu.msu.cme.rdp.readseq.utils.SequenceTrimmer.java
public static void main(String[] args) throws IOException { Options options = new Options(); options.addOption("r", "ref-seq", true, "Trim points are given as positions in a reference sequence from this file"); options.addOption("i", "inclusive", false, "Trim points are inclusive"); options.addOption("l", "length", true, "Minimum length of sequence after trimming"); options.addOption("f", "filled-ratio", true, "Minimum ratio of filled model positions of sequence after trimming"); options.addOption("o", "out", true, "Write sequences to directory (default=cwd)"); options.addOption("s", "stats", true, "Write stats to file"); PrintWriter statsOut = new PrintWriter(new NullWriter()); boolean inclusive = false; int minLength = 0; int minTrimmedLength = 0; int maxNs = 0; int maxTrimNs = 0; int trimStart = 0; int trimStop = 0; Sequence refSeq = null;/*from ww w. j a va 2 s . c o m*/ float minFilledRatio = 0; int expectedModelPos = -1; String[] inputFiles = null; File outdir = new File("."); try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption("ref-seq")) { refSeq = readRefSeq(new File(line.getOptionValue("ref-seq"))); } if (line.hasOption("inclusive")) { inclusive = true; } if (line.hasOption("length")) { minLength = Integer.valueOf(line.getOptionValue("length")); } if (line.hasOption("filled-ratio")) { minFilledRatio = Float.valueOf(line.getOptionValue("filled-ratio")); } if (line.hasOption("out")) { outdir = new File(line.getOptionValue("out")); if (!outdir.isDirectory()) { outdir = outdir.getParentFile(); System.err.println("Output option is not a directory, using " + outdir + " instead"); } } if (line.hasOption("stats")) { statsOut = new PrintWriter(line.getOptionValue("stats")); } args = line.getArgs(); if (args.length < 3) { throw new Exception("Unexpected number of arguments"); } trimStart = Integer.parseInt(args[0]); trimStop = Integer.parseInt(args[1]); inputFiles = Arrays.copyOfRange(args, 2, args.length); if (refSeq != null) { expectedModelPos = SeqUtils.getMaskedBySeqString(refSeq.getSeqString()).length(); trimStart = translateCoord(trimStart, refSeq, CoordType.seq, CoordType.model); trimStop = translateCoord(trimStop, refSeq, CoordType.seq, CoordType.model); } } catch (Exception e) { new HelpFormatter().printHelp("SequenceTrimmer <trim start> <trim stop> <aligned file> ...", options); System.err.println("Error: " + e.getMessage()); } System.err.println("Starting sequence trimmer"); System.err.println("* Input files: " + Arrays.asList(inputFiles)); System.err.println("* Minimum Length: " + minLength); System.err.println("* Trim point inclusive?: " + inclusive); System.err.println("* Trim points: " + trimStart + "-" + trimStop); System.err.println("* Min filled ratio: " + minFilledRatio); System.err.println("* refSeq: " + ((refSeq == null) ? "model" : refSeq.getSeqName() + " " + refSeq.getDesc())); Sequence seq; SeqReader reader; TrimStats stats; writeStatsHeader(statsOut); FastaWriter seqWriter; File in; for (String infile : inputFiles) { in = new File(infile); reader = new SequenceReader(in); seqWriter = new FastaWriter(new File(outdir, "trimmed_" + in.getName())); while ((seq = reader.readNextSequence()) != null) { if (seq.getSeqName().startsWith("#")) { seqWriter.writeSeq(seq.getSeqName(), "", trimMetaSeq(seq.getSeqString(), trimStart, trimStop)); continue; } stats = getStats(seq, trimStart, trimStop); boolean passed = didSeqPass(stats, minLength, minTrimmedLength, maxNs, maxTrimNs, minFilledRatio); writeStats(statsOut, seq.getSeqName(), stats, passed); if (passed) { seqWriter.writeSeq(seq.getSeqName(), seq.getDesc(), new String(stats.trimmedBases)); } } reader.close(); seqWriter.close(); } statsOut.close(); }
From source file:com.genentech.chemistry.openEye.apps.SdfRMSDNNFinder.java
public static void main(String[] args) throws IOException { Options options = new Options(); Option opt = new Option(OPT_INFILE, true, "input file oe-supported Use .sdf|.smi to specify the file type."); opt.setRequired(true);/*from www . j av a2 s .com*/ options.addOption(opt); opt = new Option(OPT_OUTFILE, true, "output file oe-supported. Use .sdf|.smi to specify the file type."); opt.setRequired(true); options.addOption(opt); opt = new Option(OPT_REFFILE, true, "Reference file containing poses from reference docking run. " + "If " + OPT_REFFILE + " not specified, program uses the input file " + "(internal NN analysis)"); options.addOption(opt); opt = new Option(OPT_MOLIdTag, true, "Name of the field containing the molecule identifier. " + " Assumption: Ref file uses the same field name."); opt.setRequired(true); options.addOption(opt); opt = new Option(OPT_DONotOpt, false, "If specified the RMSD is computed without trying to optimize the alignment."); options.addOption(opt); opt = new Option(OPT_MIRROR, false, "For non-chiral molecules also try mirror image"); options.addOption(opt); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (Exception e) { System.err.println(e.getMessage()); exitWithHelp(options); } args = cmd.getArgs(); if (args.length != 0) { ; //error message } String inFile = cmd.getOptionValue(OPT_INFILE); String outFile = cmd.getOptionValue(OPT_OUTFILE); String refFile = cmd.getOptionValue(OPT_REFFILE); boolean noRefFile = false; if (refFile == null) { if (inFile.startsWith(".")) inFile = writeRefMolPosesToTempFile(inFile); refFile = inFile; noRefFile = true; } String molIdTag = cmd.getOptionValue(OPT_MOLIdTag); boolean doOptimize = !cmd.hasOption(OPT_DONotOpt); boolean doMirror = cmd.hasOption(OPT_MIRROR); SdfRMSDNNFinder nnFinder = new SdfRMSDNNFinder(refFile, outFile, molIdTag, doMirror, doOptimize, noRefFile); nnFinder.run(inFile); nnFinder.close(); }
From source file:com.cloudera.recordbreaker.schemadict.SchemaSuggest.java
/** * SchemaSuggest takes an avro file where schema elements may be anonymous. It then attempts to * compute good labels for the anonymous elts. By default, this tool simply prints out the * suggested labels, if any. The user may include a flag to rewrite the input data using * the new labels.//from ww w .j a va2 s.c o m * * schemaSuggest avroFile * */ public static void main(String argv[]) throws IOException { CommandLine cmd = null; boolean debug = false; Options options = new Options(); options.addOption("?", false, "Help for command-line"); options.addOption("f", true, "Accept suggestions and rewrite input to a new Avro file"); options.addOption("d", false, "Debug mode"); options.addOption("k", true, "How many matches to emit."); try { CommandLineParser parser = new PosixParser(); cmd = parser.parse(options, argv); } catch (ParseException e) { HelpFormatter fmt = new HelpFormatter(); fmt.printHelp("SchemaSuggest", options, true); System.err.println("Required inputs: <schemadictionary> <anonymousAvro>"); System.exit(-1); } if (cmd.hasOption("?")) { HelpFormatter fmt = new HelpFormatter(); fmt.printHelp("SchemaSuggest", options, true); System.err.println("Required inputs: <schemadictionary> <anonymousAvro>"); System.exit(0); } if (cmd.hasOption("d")) { debug = true; } int k = 1; if (cmd.hasOption("k")) { try { k = Integer.parseInt(cmd.getOptionValue("k")); } catch (NumberFormatException nfe) { } } String[] argArray = cmd.getArgs(); if (argArray.length < 2) { HelpFormatter fmt = new HelpFormatter(); fmt.printHelp("SchemaSuggest", options, true); System.err.println("Required inputs: <schemadictionary> <anonymousAvro>"); System.exit(0); } File dataDir = new File(argArray[0]).getCanonicalFile(); File inputData = new File(argArray[1]).getCanonicalFile(); SchemaSuggest ss = new SchemaSuggest(dataDir); List<DictionaryMapping> mappings = ss.inferSchemaMapping(inputData, k); if (!cmd.hasOption("f")) { System.out.println("Ranking of closest known data types, with match-distance (smaller is better):"); int counter = 1; for (DictionaryMapping mapping : mappings) { SchemaMapping sm = mapping.getMapping(); List<SchemaMappingOp> bestOps = sm.getMapping(); System.err.println(); System.err.println(); System.err.println("-------------------------------------------------------------"); System.out.println( counter + ". '" + mapping.getDictEntry().getInfo() + "', with distance: " + sm.getDist()); List<SchemaMappingOp> renames = new ArrayList<SchemaMappingOp>(); List<SchemaMappingOp> extraInTarget = new ArrayList<SchemaMappingOp>(); List<SchemaMappingOp> extraInSource = new ArrayList<SchemaMappingOp>(); for (SchemaMappingOp op : bestOps) { if (op.opcode == SchemaMappingOp.CREATE_OP) { extraInTarget.add(op); } else if (op.opcode == SchemaMappingOp.DELETE_OP) { if (op.getS1DatasetLabel().compareTo("input") == 0) { extraInSource.add(op); } else { extraInTarget.add(op); } } else if (op.opcode == SchemaMappingOp.TRANSFORM_OP) { renames.add(op); } } System.err.println(); System.err.println(" DISCOVERED LABELS"); int counterIn = 1; if (renames.size() == 0) { System.err.println(" (None)"); } else { for (SchemaMappingOp op : renames) { System.err.println(" " + counterIn + ". " + "In '" + op.getS1DatasetLabel() + "', label '" + op.getS1FieldLabel() + "' AS " + op.getS2FieldLabel()); if (debug) { if (op.getS1DocStr() != null && op.getS1DocStr().length() > 0) { System.err.println( " '" + op.getS1DocStr() + "' ==> '" + op.getS2DocStr() + "'"); } } counterIn++; } } System.err.println(); System.err.println(" UNMATCHED ITEMS IN TARGET DATA TYPE"); counterIn = 1; if (extraInTarget.size() == 0) { System.err.println(" (None)"); } else { for (SchemaMappingOp op : extraInTarget) { System.err.println(" " + counterIn + ". " + op.getS1FieldLabel()); if (debug) { if (op.getS1DocStr() != null && op.getS1DocStr().length() > 0) { System.err.println(" " + op.getS1DocStr()); } } counterIn++; } } System.err.println(); System.err.println(" UNMATCHED ITEMS IN SOURCE DATA"); counterIn = 1; if (extraInSource.size() == 0) { System.err.println(" (None)"); } else { for (SchemaMappingOp op : extraInSource) { System.err.println(" " + counterIn + ". " + op.getS1FieldLabel()); if (debug) { if (op.getS1DocStr() != null && op.getS1DocStr().length() > 0) { System.err.println(" " + op.getS1DocStr()); } } counterIn++; } } counter++; } } }
From source file:de.unileipzig.ub.indexer.App.java
public static void main(String[] args) throws IOException { // create Options object Options options = new Options(); options.addOption("h", "help", false, "display this help"); options.addOption("f", "filename", true, "name of the JSON file whose content should be indexed"); options.addOption("i", "index", true, "the name of the target index"); options.addOption("d", "doctype", true, "the name of the doctype (title, local, ...)"); options.addOption("t", "host", true, "elasticsearch hostname (default: 0.0.0.0)"); options.addOption("p", "port", true, "transport port (that's NOT the http port, default: 9300)"); options.addOption("c", "cluster", true, "cluster name (default: elasticsearch_mdma)"); options.addOption("b", "bulksize", true, "number of docs sent in one request (default: 3000)"); options.addOption("v", "verbose", false, "show processing speed while indexing"); options.addOption("s", "status", false, "only show status of index for file"); options.addOption("r", "repair", false, "attempt to repair recoverable inconsistencies on the go"); options.addOption("e", "debug", false, "set logging level to debug"); options.addOption("l", "logfile", true, "logfile - in not specified only log to stdout"); options.addOption("m", "memcached", true, "host and port of memcached (default: localhost:11211)"); options.addOption("z", "latest-flag-on", true, "enable latest flag according to field (within content, e.g. 001)"); options.addOption("a", "flat", false, "flat-mode: do not check for inconsistencies"); CommandLineParser parser = new PosixParser(); CommandLine cmd = null;/* www .j a va2 s. co m*/ try { cmd = parser.parse(options, args); } catch (ParseException ex) { logger.error(ex); System.exit(1); } // setup logging Properties systemProperties = System.getProperties(); systemProperties.put("net.spy.log.LoggerImpl", "net.spy.memcached.compat.log.Log4JLogger"); System.setProperties(systemProperties); Logger.getLogger("net.spy.memcached").setLevel(Level.ERROR); Properties props = new Properties(); props.load(props.getClass().getResourceAsStream("/log4j.properties")); if (cmd.hasOption("debug")) { props.setProperty("log4j.logger.de.unileipzig", "DEBUG"); } if (cmd.hasOption("logfile")) { props.setProperty("log4j.rootLogger", "INFO, stdout, F"); props.setProperty("log4j.appender.F", "org.apache.log4j.FileAppender"); props.setProperty("log4j.appender.F.File", cmd.getOptionValue("logfile")); props.setProperty("log4j.appender.F.layout", "org.apache.log4j.PatternLayout"); props.setProperty("log4j.appender.F.layout.ConversionPattern", "%5p | %d | %F | %L | %m%n"); } PropertyConfigurator.configure(props); InetAddress addr = InetAddress.getLocalHost(); String memcachedHostAndPort = addr.getHostAddress() + ":11211"; if (cmd.hasOption("m")) { memcachedHostAndPort = cmd.getOptionValue("m"); } // setup caching try { if (memcachedClient == null) { memcachedClient = new MemcachedClient( new ConnectionFactoryBuilder().setFailureMode(FailureMode.Cancel).build(), AddrUtil.getAddresses("0.0.0.0:11211")); try { // give client and server 500ms Thread.sleep(300); } catch (InterruptedException ex) { } Collection availableServers = memcachedClient.getAvailableServers(); logger.info(availableServers); if (availableServers.size() == 0) { logger.info("no memcached servers found"); memcachedClient.shutdown(); memcachedClient = null; } else { logger.info(availableServers.size() + " memcached server(s) detected, fine."); } } } catch (IOException ex) { logger.warn("couldn't create a connection, bailing out: " + ex.getMessage()); } // process options if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("indexer", options, true); quit(0); } boolean verbose = false; if (cmd.hasOption("verbose")) { verbose = true; } // ES options String[] hosts = new String[] { "0.0.0.0" }; int port = 9300; String clusterName = "elasticsearch_mdma"; int bulkSize = 3000; if (cmd.hasOption("host")) { hosts = cmd.getOptionValues("host"); } if (cmd.hasOption("port")) { port = Integer.parseInt(cmd.getOptionValue("port")); } if (cmd.hasOption("cluster")) { clusterName = cmd.getOptionValue("cluster"); } if (cmd.hasOption("bulksize")) { bulkSize = Integer.parseInt(cmd.getOptionValue("bulksize")); if (bulkSize < 1 || bulkSize > 100000) { logger.error("bulksize must be between 1 and 100,000"); quit(1); } } // ES Client final Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "elasticsearch_mdma") .build(); final TransportClient client = new TransportClient(settings); for (String host : hosts) { client.addTransportAddress(new InetSocketTransportAddress(host, port)); } if (cmd.hasOption("filename") && cmd.hasOption("index") && cmd.hasOption("doctype")) { final String filename = cmd.getOptionValue("filename"); final File _file = new File(filename); if (_file.length() == 0) { logger.info(_file.getAbsolutePath() + " is empty, skipping"); quit(0); // file is empty } // for flat mode: leave a stampfile beside the json to // indicate previous successful processing File directory = new File(filename).getParentFile(); File stampfile = new File(directory, DigestUtils.shaHex(filename) + ".indexed"); long start = System.currentTimeMillis(); long lineCount = 0; final String indexName = cmd.getOptionValue("index"); final String docType = cmd.getOptionValue("doctype"); BulkRequestBuilder bulkRequest = client.prepareBulk(); try { if (cmd.hasOption("flat")) { // flat mode // ......... if (stampfile.exists()) { logger.info("SKIPPING, since it seems this file has already " + "been imported (found: " + stampfile.getAbsolutePath() + ")"); quit(0); } } else { final String srcSHA1 = extractSrcSHA1(filename); logger.debug(filename + " srcsha1: " + srcSHA1); long docsInIndex = getIndexedRecordCount(client, indexName, srcSHA1); logger.debug(filename + " indexed: " + docsInIndex); long docsInFile = getLineCount(filename); logger.debug(filename + " lines: " + docsInFile); // in non-flat-mode, indexing would take care // of inconsistencies if (docsInIndex == docsInFile) { logger.info("UP-TO DATE: " + filename + " (" + docsInIndex + ", " + srcSHA1 + ")"); client.close(); quit(0); } if (docsInIndex > 0) { logger.warn("INCONSISTENCY DETECTED: " + filename + ": indexed:" + docsInIndex + " lines:" + docsInFile); if (!cmd.hasOption("r")) { logger.warn( "Please re-run indexer with --repair flag or delete residues first with: $ curl -XDELETE " + hosts[0] + ":9200/" + indexName + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1 + "\" }}'"); client.close(); quit(1); } else { logger.info("Attempting to clear residues..."); // attempt to repair once DeleteByQueryResponse dbqr = client.prepareDeleteByQuery(indexName) .setQuery(termQuery("meta.srcsha1", srcSHA1)).execute().actionGet(); Iterator<IndexDeleteByQueryResponse> it = dbqr.iterator(); long deletions = 0; while (it.hasNext()) { IndexDeleteByQueryResponse response = it.next(); deletions += 1; } logger.info("Deleted residues of " + filename); logger.info("Refreshing [" + indexName + "]"); RefreshResponse refreshResponse = client.admin().indices() .refresh(new RefreshRequest(indexName)).actionGet(); long indexedAfterDelete = getIndexedRecordCount(client, indexName, srcSHA1); logger.info(indexedAfterDelete + " docs remained"); if (indexedAfterDelete > 0) { logger.warn("Not all residues cleaned. Try to fix this manually: $ curl -XDELETE " + hosts[0] + ":9200/" + indexName + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1 + "\" }}'"); quit(1); } else { logger.info("Residues are gone. Now trying to reindex: " + filename); } } } } logger.info("INDEXING-REQUIRED: " + filename); if (cmd.hasOption("status")) { quit(0); } HashSet idsInBatch = new HashSet(); String idField = null; if (cmd.hasOption("z")) { idField = cmd.getOptionValue("z"); } final FileReader fr = new FileReader(filename); final BufferedReader br = new BufferedReader(fr); String line; // one line is one document while ((line = br.readLine()) != null) { // "Latest-Flag" machine // This gets obsolete with a "flat" index if (cmd.hasOption("z")) { // flag that indicates, whether the document // about to be indexed will be the latest boolean willBeLatest = true; // check if there is a previous (lower meta.timestamp) document with // the same identifier (whatever that may be - queried under "content") final String contentIdentifier = getContentIdentifier(line, idField); idsInBatch.add(contentIdentifier); // assumed in meta.timestamp final Long timestamp = Long.parseLong(getTimestamp(line)); logger.debug("Checking whether record is latest (line: " + lineCount + ")"); logger.debug(contentIdentifier + ", " + timestamp); // get all docs, which match the contentIdentifier // by filter, which doesn't score final TermFilterBuilder idFilter = new TermFilterBuilder("content." + idField, contentIdentifier); final TermFilterBuilder kindFilter = new TermFilterBuilder("meta.kind", docType); final AndFilterBuilder afb = new AndFilterBuilder(); afb.add(idFilter).add(kindFilter); final FilteredQueryBuilder fb = filteredQuery(matchAllQuery(), afb); final SearchResponse searchResponse = client.prepareSearch(indexName) .setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(fb).setFrom(0) .setSize(1200) // 3 years and 105 days assuming daily updates at the most .setExplain(false).execute().actionGet(); final SearchHits searchHits = searchResponse.getHits(); logger.debug("docs with this id in the index: " + searchHits.getTotalHits()); for (final SearchHit hit : searchHits.getHits()) { final String docId = hit.id(); final Map<String, Object> source = hit.sourceAsMap(); final Map meta = (Map) source.get("meta"); final Long docTimestamp = Long.parseLong(meta.get("timestamp").toString()); // if the indexed doc timestamp is lower the the current one, // remove any latest flag if (timestamp >= docTimestamp) { source.remove("latest"); final ObjectMapper mapper = new ObjectMapper(); // put the updated doc back // IndexResponse response = client.prepareIndex(indexName, docType).setCreate(false).setId(docId) .setSource(mapper.writeValueAsBytes(source)) .execute(new ActionListener<IndexResponse>() { public void onResponse(IndexResponse rspns) { logger.debug("Removed latest flag from " + contentIdentifier + ", " + docTimestamp + ", " + hit.id() + " since (" + timestamp + " > " + docTimestamp + ")"); } public void onFailure(Throwable thrwbl) { logger.error("Could not remove flag from " + hit.id() + ", " + contentIdentifier); } }); // .execute() //.actionGet(); } else { logger.debug("Doc " + hit.id() + " is newer (" + docTimestamp + ")"); willBeLatest = false; } } if (willBeLatest) { line = setLatestFlag(line); logger.info("Setting latest flag on " + contentIdentifier + ", " + timestamp); } // end of latest-flag machine // beware - this will be correct as long as there // are no dups within one bulk! } bulkRequest.add(client.prepareIndex(indexName, docType).setSource(line)); lineCount++; logger.debug("Added line " + lineCount + " to BULK"); logger.debug(line); if (lineCount % bulkSize == 0) { if (idsInBatch.size() != bulkSize && cmd.hasOption("z")) { logger.error( "This batch has duplications in the ID. That's not bad for the index, just makes the latest flag fuzzy"); logger.error( "Bulk size was: " + bulkSize + ", but " + idsInBatch.size() + " IDs (only)"); } idsInBatch.clear(); logger.debug("Issuing BULK request"); final long actionCount = bulkRequest.numberOfActions(); final BulkResponse bulkResponse = bulkRequest.execute().actionGet(); final long tookInMillis = bulkResponse.getTookInMillis(); if (bulkResponse.hasFailures()) { logger.fatal("FAILED, bulk not indexed. exiting now."); Iterator<BulkItemResponse> it = bulkResponse.iterator(); while (it.hasNext()) { BulkItemResponse bir = it.next(); if (bir.isFailed()) { Failure failure = bir.getFailure(); logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage() + ", type: " + failure.getType() + ", index: " + failure.getIndex()); } } quit(1); } else { if (verbose) { final double elapsed = System.currentTimeMillis() - start; final double speed = (lineCount / elapsed * 1000); logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount + "/" + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)"); } } bulkRequest = client.prepareBulk(); } } // handle the remaining items final long actionCount = bulkRequest.numberOfActions(); if (actionCount > 0) { final BulkResponse bulkResponse = bulkRequest.execute().actionGet(); final long tookInMillis = bulkResponse.getTookInMillis(); if (bulkResponse.hasFailures()) { logger.fatal("FAILED, bulk not indexed. exiting now."); Iterator<BulkItemResponse> it = bulkResponse.iterator(); while (it.hasNext()) { BulkItemResponse bir = it.next(); if (bir.isFailed()) { Failure failure = bir.getFailure(); logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage() + ", type: " + failure.getType() + ", index: " + failure.getIndex()); } } quit(1); } else { // trigger update now RefreshResponse refreshResponse = client.admin().indices() .refresh(new RefreshRequest(indexName)).actionGet(); if (verbose) { final double elapsed = System.currentTimeMillis() - start; final double speed = (lineCount / elapsed * 1000); logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount + "/" + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)"); } } } br.close(); client.close(); final double elapsed = (System.currentTimeMillis() - start) / 1000; final double speed = (lineCount / elapsed); logger.info("indexing (" + filename + ") " + lineCount + " docs took " + elapsed + "s (speed: " + String.format("%.2f", speed) + "r/s)"); if (cmd.hasOption("flat")) { try { FileUtils.touch(stampfile); } catch (IOException ioe) { logger.warn(".indexed files not created. Will reindex everything everytime."); } } } catch (IOException e) { client.close(); logger.error(e); quit(1); } finally { client.close(); } } quit(0); }
From source file:edu.msu.cme.rdp.seqmatch.cli.SeqmatchCheckRevSeq.java
public static void main(String[] args) throws Exception { String trainingFile = null;// w ww. j a v a 2 s . com String queryFile = null; String outputFile = null; PrintWriter revOutputWriter = new PrintWriter(System.out); PrintStream correctedQueryOut = System.out; String traineeDesc = null; int numOfResults = 20; boolean checkReverse = false; float diffScoreCutoff = CheckReverseSeq.DIFF_SCORE_CUTOFF; String format = "txt"; // default try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption("c")) { checkReverse = true; } if (line.hasOption("t")) { trainingFile = line.getOptionValue("t"); } else { throw new Exception("training file must be specified"); } if (line.hasOption("q")) { queryFile = line.getOptionValue("q"); } else { throw new Exception("query file must be specified"); } if (line.hasOption("o")) { outputFile = line.getOptionValue("o"); } else { throw new Exception("output file must be specified"); } if (line.hasOption("r")) { revOutputWriter = new PrintWriter(line.getOptionValue("r")); } if (line.hasOption("s")) { correctedQueryOut = new PrintStream(line.getOptionValue("s")); } if (line.hasOption("d")) { diffScoreCutoff = Float.parseFloat(line.getOptionValue("d")); } if (line.hasOption("h")) { traineeDesc = line.getOptionValue("h"); } if (line.hasOption("n")) { numOfResults = Integer.parseInt(line.getOptionValue("n")); } if (line.hasOption("f")) { format = line.getOptionValue("f"); if (!format.equals("tab") && !format.equals("dbformat") && !format.equals("xml")) { throw new IllegalArgumentException("Only dbformat, tab or xml format available"); } } } catch (Exception e) { System.out.println("Command Error: " + e.getMessage()); new HelpFormatter().printHelp(120, "SeqmatchCheckRevSeq", "", options, "", true); return; } SeqmatchCheckRevSeq theObj = new SeqmatchCheckRevSeq(); if (!checkReverse) { theObj.doUserLibMatch(queryFile, trainingFile, outputFile, numOfResults, format, traineeDesc); } else { theObj.checkRevSeq(queryFile, trainingFile, outputFile, revOutputWriter, correctedQueryOut, diffScoreCutoff, format, traineeDesc); } }
From source file:at.ac.tuwien.inso.subcat.postprocessor.PostProcessor.java
public static void main(String[] args) { Map<String, PostProcessorTask> steps = new HashMap<String, PostProcessorTask>(); PostProcessorTask _step = new ClassificationTask(); steps.put(_step.getName(), _step);//w w w .jav a 2 s . c o m CommentAnalyserTask commentAnalysisStep = new CommentAnalyserTask(); steps.put(commentAnalysisStep.getName(), commentAnalysisStep); AccountInterlinkingTask interlinkingTask = new AccountInterlinkingTask(); steps.put(interlinkingTask.getName(), interlinkingTask); _step = new CommitBugInterlinkingTask(); steps.put(_step.getName(), _step); Options options = new Options(); options.addOption("h", "help", false, "Show this options"); options.addOption("d", "db", true, "The database to process (required)"); options.addOption("v", "verbose", false, "Show details"); options.addOption("p", "project", true, "The project ID to process"); options.addOption("P", "list-projects", false, "List all registered projects"); options.addOption("S", "list-processor-steps", false, "List all registered processor steps"); options.addOption("s", "processor-step", true, "A processor step name"); options.addOption("c", "commit-dictionary", true, "Path to a classification dictionary for commit message classification"); options.addOption("b", "bug-dictionary", true, "Path to a classification dictionary for bug classification"); options.addOption("m", "smart-matching", true, "Smart user matching configuration. Syntax: <method>:<distance>"); options.addOption("M", "list-matching-methods", false, "List smart matching methods"); final Reporter reporter = new Reporter(true); reporter.startTimer(); Settings settings = new Settings(); ModelPool pool = null; boolean printTraces = false; CommandLineParser parser = new PosixParser(); try { CommandLine cmd = parser.parse(options, args); printTraces = cmd.hasOption("verbose"); if (cmd.hasOption("help")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("postprocessor", options); return; } if (cmd.hasOption("list-processor-steps")) { for (String proj : steps.keySet()) { System.out.println(" " + proj); } return; } if (cmd.hasOption("list-matching-methods")) { for (String method : HashFunc.getHashFuncNames()) { System.out.println(" " + method); } return; } if (cmd.hasOption("db") == false) { reporter.error("post-processor", "Option --db is required"); reporter.printSummary(); return; } File dbf = new File(cmd.getOptionValue("db")); if (dbf.exists() == false || dbf.isFile() == false) { reporter.error("post-processor", "Invalid database file path"); reporter.printSummary(); return; } pool = new ModelPool(cmd.getOptionValue("db"), 2); if (cmd.hasOption("list-projects")) { Model model = pool.getModel(); for (Project proj : model.getProjects()) { System.out.println(" " + proj.getId() + ": " + proj.getDate()); } model.close(); return; } Integer projId = null; if (cmd.hasOption("project") == false) { reporter.error("post-processor", "Option --project is required"); reporter.printSummary(); return; } else { try { projId = Integer.parseInt(cmd.getOptionValue("project")); } catch (NumberFormatException e) { reporter.error("post-processor", "Invalid project ID"); reporter.printSummary(); return; } } Model model = pool.getModel(); Project project = model.getProject(projId); model.close(); if (project == null) { reporter.error("post-processor", "Invalid project ID"); reporter.printSummary(); return; } if (cmd.hasOption("bug-dictionary")) { DictionaryParser dp = new DictionaryParser(); for (String path : cmd.getOptionValues("bug-dictionary")) { try { Dictionary dict = dp.parseFile(path); settings.bugDictionaries.add(dict); } catch (FileNotFoundException e) { reporter.error("post-processor", "File not found: " + path + ": " + e.getMessage()); reporter.printSummary(); return; } catch (XmlReaderException e) { reporter.error("post-processor", "XML Error: " + path + ": " + e.getMessage()); reporter.printSummary(); return; } } } if (cmd.hasOption("commit-dictionary")) { DictionaryParser dp = new DictionaryParser(); for (String path : cmd.getOptionValues("commit-dictionary")) { try { Dictionary dict = dp.parseFile(path); settings.srcDictionaries.add(dict); } catch (FileNotFoundException e) { reporter.error("post-processor", "File not found: " + path + ": " + e.getMessage()); reporter.printSummary(); return; } catch (XmlReaderException e) { reporter.error("post-processor", "XML Error: " + path + ": " + e.getMessage()); reporter.printSummary(); return; } } } if (cmd.hasOption("smart-matching")) { String str = cmd.getOptionValue("smart-matching"); String[] parts = str.split(":"); if (parts.length != 2) { reporter.error("post-processor", "Unexpected smart-matching format"); reporter.printSummary(); return; } HashFunc func = HashFunc.getHashFunc(parts[0]); if (func == null) { reporter.error("post-processor", "Unknown smart matching hash function"); reporter.printSummary(); return; } int dist = -1; try { dist = Integer.parseInt(parts[1]); } catch (NumberFormatException e) { dist = -1; } if (dist < 0) { reporter.error("post-processor", "Invalid smart matching edist distance"); reporter.printSummary(); return; } interlinkingTask.setDistance(dist); interlinkingTask.setHashFunc(func); } PostProcessor processor = new PostProcessor(project, pool, settings); if (cmd.hasOption("processor-step")) { for (String stepName : cmd.getOptionValues("processor-step")) { PostProcessorTask step = steps.get(stepName); if (step == null) { reporter.error("post-processor", "Unknown processor step: '" + stepName + "'"); reporter.printSummary(); return; } processor.register(step); } } else { processor.register(steps.values()); } if (printTraces == true) { model = pool.getModel(); final Stats stats = model.getStats(project); model.close(); processor.addListener(new PostProcessorListener() { private int commitCount = 0; private int bugCount = 0; @Override public void commit(PostProcessor proc) { commitCount++; reporter.note("post-processor", "status: Commit " + commitCount + "/" + stats.commitCount); } @Override public void bug(PostProcessor proc) { bugCount++; reporter.note("post-processor", "status: Bug " + bugCount + "/" + stats.bugCount); } }); } processor.process(); } catch (ParseException e) { reporter.error("post-processor", "Parsing failed: " + e.getMessage()); if (printTraces == true) { e.printStackTrace(); } } catch (ClassNotFoundException e) { reporter.error("post-processor", "Failed to create a database connection: " + e.getMessage()); if (printTraces == true) { e.printStackTrace(); } } catch (SQLException e) { reporter.error("post-processor", "Failed to create a database connection: " + e.getMessage()); if (printTraces == true) { e.printStackTrace(); } } catch (PostProcessorException e) { reporter.error("post-processor", "Post-Processor Error: " + e.getMessage()); if (printTraces == true) { e.printStackTrace(); } } finally { if (pool != null) { pool.close(); } } reporter.printSummary(true); }
From source file:at.ac.tuwien.inso.subcat.reporter.Reporter.java
public static void main(String[] args) { Options options = new Options(); options.addOption("h", "help", false, "Show this options"); options.addOption("d", "db", true, "The database to process (required)"); options.addOption("p", "project", true, "The project ID to process"); options.addOption("P", "list-projects", false, "List all registered projects"); options.addOption("C", "config", true, "A configuration file including reports"); options.addOption("F", "list-formats", false, "List all supported output formats"); options.addOption("f", "format", true, "Output format"); options.addOption("R", "list-reports", false, "List all report types"); options.addOption("r", "report", true, "Report type"); options.addOption("o", "output", true, "Output path"); options.addOption("c", "commit-dictionary", true, "The commit dictionary ID to use"); options.addOption("b", "bug-dictionary", true, "The bug dictionary ID to use"); options.addOption("D", "list-dictionaries", false, "List all dictionaries"); options.addOption("v", "verbose", false, "Show details"); at.ac.tuwien.inso.subcat.utility.Reporter errReporter = new at.ac.tuwien.inso.subcat.utility.Reporter( false);/*from w ww . j a v a 2 s. c om*/ Settings settings = new Settings(); boolean verbose = false; ModelPool pool = null; Model model = null; CommandLineParser parser = new PosixParser(); try { CommandLine cmd = parser.parse(options, args); verbose = cmd.hasOption("verbose"); if (cmd.hasOption("help")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("postprocessor", options); return; } if (cmd.hasOption("db") == false) { errReporter.error("reporter", "Option --db is required"); errReporter.printSummary(); return; } if (cmd.hasOption("config") == false) { errReporter.error("reporter", "Option --config is required"); errReporter.printSummary(); return; } Configuration config = new Configuration(); Parser configParser = new Parser(); try { configParser.parse(config, new File(cmd.getOptionValue("config"))); } catch (IOException e) { errReporter.error("reporter", "Could not read configuration file: " + e.getMessage()); errReporter.printSummary(); return; } catch (ParserException e) { errReporter.error("reporter", "Could not parse configuration file: " + e.getMessage()); errReporter.printSummary(); return; } if (cmd.hasOption("list-reports")) { int i = 1; for (ExporterConfig exconf : config.getExporterConfigs()) { System.out.println(" (" + i + ") " + exconf.getName()); i++; } return; } File dbf = new File(cmd.getOptionValue("db")); if (dbf.exists() == false || dbf.isFile() == false) { errReporter.error("reporter", "Invalid database file path"); errReporter.printSummary(); return; } pool = new ModelPool(cmd.getOptionValue("db"), 2); pool.setPrintTemplates(verbose); model = pool.getModel(); if (cmd.hasOption("list-formats")) { Reporter exporter = new Reporter(model); int i = 1; for (ReportWriter formatter : exporter.getWriters()) { System.out.println(" (" + i + ") " + formatter.getLabel()); i++; } return; } if (cmd.hasOption("list-projects")) { for (Project proj : model.getProjects()) { System.out.println(" " + proj.getId() + ": " + proj.getDate()); } return; } Integer projId = null; if (cmd.hasOption("project") == false) { errReporter.error("reporter", "Option --project is required"); errReporter.printSummary(); return; } else { try { projId = Integer.parseInt(cmd.getOptionValue("project")); } catch (NumberFormatException e) { errReporter.error("reporter", "Invalid project ID"); errReporter.printSummary(); return; } } if (cmd.hasOption("output") == false) { errReporter.error("reporter", "Option --output is required"); errReporter.printSummary(); return; } String outputPath = cmd.getOptionValue("output"); model = pool.getModel(); Project project = model.getProject(projId); if (project == null) { errReporter.error("reporter", "Invalid project ID"); errReporter.printSummary(); return; } if (cmd.hasOption("list-dictionaries")) { List<at.ac.tuwien.inso.subcat.model.Dictionary> dictionaries = model.getDictionaries(project); for (at.ac.tuwien.inso.subcat.model.Dictionary dict : dictionaries) { System.out .println(" (" + dict.getId() + ") " + " " + dict.getContext() + " " + dict.getName()); } return; } int bugDictId = -1; if (cmd.hasOption("bug-dictionary")) { try { bugDictId = Integer.parseInt(cmd.getOptionValue("bug-dictionary")); List<at.ac.tuwien.inso.subcat.model.Dictionary> dictionaries = model.getDictionaries(project); boolean valid = false; for (at.ac.tuwien.inso.subcat.model.Dictionary dict : dictionaries) { if (dict.getId() == bugDictId) { valid = true; break; } } if (valid == false) { errReporter.error("reporter", "Invalid bug dictionary ID"); errReporter.printSummary(); return; } } catch (NumberFormatException e) { errReporter.error("reporter", "Invalid bug dictionary ID"); errReporter.printSummary(); return; } } int commitDictId = -1; if (cmd.hasOption("commit-dictionary")) { try { commitDictId = Integer.parseInt(cmd.getOptionValue("commit-dictionary")); List<at.ac.tuwien.inso.subcat.model.Dictionary> dictionaries = model.getDictionaries(project); boolean valid = false; for (at.ac.tuwien.inso.subcat.model.Dictionary dict : dictionaries) { if (dict.getId() == commitDictId) { valid = true; break; } } if (valid == false) { errReporter.error("reporter", "Invalid commit dictionary ID"); errReporter.printSummary(); return; } } catch (NumberFormatException e) { errReporter.error("reporter", "Invalid commit dictionary ID"); errReporter.printSummary(); return; } } if (cmd.hasOption("format") == false) { errReporter.error("reporter", "Option --format is required"); errReporter.printSummary(); return; } Reporter exporter = new Reporter(model); ReportWriter writer = null; try { int id = Integer.parseInt(cmd.getOptionValue("format")); if (id < 1 || id > exporter.getWriters().size()) { errReporter.error("reporter", "Invalid output format"); errReporter.printSummary(); return; } writer = exporter.getWriters().get(id - 1); } catch (NumberFormatException e) { errReporter.error("reporter", "Invalid output format"); errReporter.printSummary(); return; } ExporterConfig exporterConfig = null; if (cmd.hasOption("report") == false) { errReporter.error("reporter", "Option --report is required"); errReporter.printSummary(); return; } else { try { int id = Integer.parseInt(cmd.getOptionValue("report")); if (id < 1 || id > config.getExporterConfigs().size()) { errReporter.error("reporter", "Invalid reporter ID"); errReporter.printSummary(); return; } exporterConfig = config.getExporterConfigs().get(id - 1); } catch (NumberFormatException e) { errReporter.error("reporter", "Invalid reporter ID"); errReporter.printSummary(); return; } } exporter.export(exporterConfig, project, commitDictId, bugDictId, settings, writer, outputPath); } catch (ParseException e) { errReporter.error("reporter", "Parsing failed: " + e.getMessage()); if (verbose == true) { e.printStackTrace(); } } catch (ClassNotFoundException e) { errReporter.error("reporter", "Failed to create a database connection: " + e.getMessage()); if (verbose == true) { e.printStackTrace(); } } catch (SQLException e) { errReporter.error("reporter", "Failed to create a database connection: " + e.getMessage()); if (verbose == true) { e.printStackTrace(); } } catch (ReporterException e) { errReporter.error("reporter", "Reporter Error: " + e.getMessage()); if (verbose == true) { e.printStackTrace(); } } finally { if (model != null) { model.close(); } if (pool != null) { pool.close(); } } errReporter.printSummary(); }
From source file:client.MultiplexingClient.java
public static void main(String[] args) throws Exception { // Prepare to parse the command line Options options = new Options(); Option sslOpt = new Option("s", "ssl", false, "Use SSL"); Option debugOpt = new Option("d", true, "Debug level (NONE, FINER, FINE, CONFIG, INFO, WARNING, SEVERE. Default INFO."); Option numConnectionsOpt = new Option("n", true, "Number of connections to establish. [Default: 1]"); Option numPcktOpt = new Option("p", true, "Number of packets to send in each connection. [Default: 20]"); Option pcktMaxSizeOpt = new Option("m", true, "Maximum size of packets. [Default: 4096]"); Option help = new Option("h", "print this message"); options.addOption(help);//from ww w . ja v a 2 s. c om options.addOption(debugOpt); options.addOption(numConnectionsOpt); options.addOption(numPcktOpt); options.addOption(pcktMaxSizeOpt); options.addOption(sslOpt); CommandLineParser parser = new PosixParser(); // parse the command line arguments CommandLine line = parser.parse(options, args); if (line.hasOption(help.getOpt()) || line.getArgs().length < 1) { showUsage(options); return; } if (line.hasOption(sslOpt.getOpt())) { channelFactory = new SSLChannelFactory(true, TRUSTSTORE, TRUSTSTORE_PASSWORD); } else { channelFactory = new PlainChannelFactory(); } if (line.hasOption(numConnectionsOpt.getOpt())) { connectionCount = Integer.parseInt(line.getOptionValue(numConnectionsOpt.getOpt())); } else { connectionCount = 1; } if (line.hasOption(numPcktOpt.getOpt())) { packetsToSend = Integer.parseInt(line.getOptionValue(numPcktOpt.getOpt())); } else { packetsToSend = 20; } if (line.hasOption(pcktMaxSizeOpt.getOpt())) { maxPcktSize = Integer.parseInt(line.getOptionValue(pcktMaxSizeOpt.getOpt())); } else { maxPcktSize = 4096; } InetSocketAddress remotePoint; try { String host = line.getArgs()[0]; int colonIndex = host.indexOf(':'); remotePoint = new InetSocketAddress(host.substring(0, colonIndex), Integer.parseInt(host.substring(colonIndex + 1))); } catch (Exception e) { showUsage(options); return; } // Setups the logging context for Log4j // NDC.push(Thread.currentThread().getName()); st = new SelectorThread(); for (int i = 0; i < connectionCount; i++) { new MultiplexingClient(remotePoint); // Must sleep for a while between opening connections in order // to give the remote host enough time to handle them. Otherwise, // the remote host backlog will get full and the connection // attemps will start to be refused. Thread.sleep(100); } }
From source file:com.google.flightmap.parsing.faa.nasr.CommParser.java
public static void main(String args[]) { CommandLine line = null;//from w ww . j a v a2s .c o m try { final CommandLineParser parser = new PosixParser(); line = parser.parse(OPTIONS, args); } catch (ParseException pEx) { System.err.println(pEx.getMessage()); printHelp(line); System.exit(1); } if (line.hasOption(HELP_OPTION)) { printHelp(line); System.exit(0); } final String twrFile = line.getOptionValue(TWR_OPTION); final String iataToIcaoFile = line.getOptionValue(IATA_TO_ICAO_OPTION); final String freqUsesNormalizationFile = line.getOptionValue(FREQ_USES_NORMALIZATION_OPTION); final String dbFile = line.getOptionValue(AVIATION_DB_OPTION); try { (new CommParser(twrFile, iataToIcaoFile, freqUsesNormalizationFile, dbFile)).execute(); } catch (Exception ex) { ex.printStackTrace(); System.exit(1); } }
From source file:edu.msu.cme.rdp.classifier.comparison.ComparisonCmd.java
public static void main(String[] args) throws Exception { String queryFile1 = null;/*from w w w . j av a 2 s . co m*/ String queryFile2 = null; String class_outputFile = null; String compare_outputFile = null; String propFile = null; ClassificationResultFormatter.FORMAT format = CmdOptions.DEFAULT_FORMAT; float conf_cutoff = CmdOptions.DEFAULT_CONF; String gene = null; try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption(QUERYFILE1_SHORT_OPT)) { queryFile1 = line.getOptionValue(QUERYFILE1_SHORT_OPT); } else { throw new Exception("queryFile1 must be specified"); } if (line.hasOption(QUERYFILE2_SHORT_OPT)) { queryFile2 = line.getOptionValue(QUERYFILE2_SHORT_OPT); } else { throw new Exception("queryFile2 must be specified"); } if (line.hasOption(CmdOptions.OUTFILE_SHORT_OPT)) { class_outputFile = line.getOptionValue(CmdOptions.OUTFILE_SHORT_OPT); } else { throw new Exception("outputFile for classification results must be specified"); } if (line.hasOption(COMPARE_OUTFILE_SHORT_OPT)) { compare_outputFile = line.getOptionValue(COMPARE_OUTFILE_SHORT_OPT); } else { throw new Exception("outputFile for comparsion results must be specified"); } if (line.hasOption(CmdOptions.TRAINPROPFILE_SHORT_OPT)) { if (gene != null) { throw new IllegalArgumentException( "Already specified the gene from the default location. Can not specify train_propfile"); } else { propFile = line.getOptionValue(CmdOptions.TRAINPROPFILE_SHORT_OPT); } } if (line.hasOption(CmdOptions.BOOTSTRAP_SHORT_OPT)) { conf_cutoff = Float.parseFloat(line.getOptionValue(CmdOptions.BOOTSTRAP_SHORT_OPT)); } if (line.hasOption(CmdOptions.FORMAT_SHORT_OPT)) { String f = line.getOptionValue(CmdOptions.FORMAT_SHORT_OPT); if (f.equalsIgnoreCase("allrank")) { format = ClassificationResultFormatter.FORMAT.allRank; } else if (f.equalsIgnoreCase("fixrank")) { format = ClassificationResultFormatter.FORMAT.fixRank; } else if (f.equalsIgnoreCase("filterbyconf")) { format = ClassificationResultFormatter.FORMAT.filterbyconf; } else if (f.equalsIgnoreCase("db")) { format = ClassificationResultFormatter.FORMAT.dbformat; } else { throw new IllegalArgumentException( "Not valid output format, only allrank, fixrank, filterbyconf and db allowed"); } } if (line.hasOption(CmdOptions.GENE_SHORT_OPT)) { if (propFile != null) { throw new IllegalArgumentException( "Already specified train_propfile. Can not specify gene any more"); } gene = line.getOptionValue(CmdOptions.GENE_SHORT_OPT).toLowerCase(); if (!gene.equals(ClassifierFactory.RRNA_16S_GENE) && !gene.equals(ClassifierFactory.FUNGALLSU_GENE)) { throw new IllegalArgumentException(gene + " is NOT valid, only allows " + ClassifierFactory.RRNA_16S_GENE + " and " + ClassifierFactory.FUNGALLSU_GENE); } } } catch (Exception e) { System.out.println("Command Error: " + e.getMessage()); new HelpFormatter().printHelp(120, "ComparisonCmd", "", options, "", true); return; } if (propFile == null && gene == null) { gene = ClassifierFactory.RRNA_16S_GENE; } ComparisonCmd cmd = new ComparisonCmd(propFile, gene); cmd.setConfidenceCutoff(conf_cutoff); printLicense(); cmd.doClassify(queryFile1, queryFile2, class_outputFile, compare_outputFile, format); }