List of usage examples for org.apache.commons.lang3.tuple Pair getLeft
public abstract L getLeft();
Gets the left element from this pair.
When treated as a key-value pair, this is the key.
From source file:com.act.lcms.MassCalculator2.java
public static void main(String[] args) throws Exception { CommandLine cl = CLI_UTIL.parseCommandLine(args); if (cl.hasOption(OPTION_LICENSE_FILE)) { LOGGER.info("Using license file at %s", cl.getOptionValue(OPTION_LICENSE_FILE)); LicenseManager.setLicenseFile(cl.getOptionValue(OPTION_LICENSE_FILE)); }/*from w w w .j av a 2s . c om*/ List<String> inchis = new ArrayList<>(); if (cl.hasOption(OPTION_INPUT_FILE)) { try (BufferedReader reader = new BufferedReader(new FileReader(cl.getOptionValue(OPTION_INPUT_FILE)))) { String line; while ((line = reader.readLine()) != null) { inchis.add(line); } } } if (cl.getArgList().size() > 0) { LOGGER.info("Reading %d InChIs from the command line", cl.getArgList().size()); inchis.addAll(cl.getArgList()); } try (PrintWriter writer = new PrintWriter( cl.hasOption(OPTION_OUTPUT_FILE) ? new FileWriter(cl.getOptionValue(OPTION_OUTPUT_FILE)) : new OutputStreamWriter(System.out))) { writer.format("InChI\tMass\tCharge\n"); for (String inchi : inchis) { try { Pair<Double, Integer> massAndCharge = calculateMassAndCharge(inchi); writer.format("%s\t%.6f\t%3d\n", inchi, massAndCharge.getLeft(), massAndCharge.getRight()); } catch (MolFormatException e) { LOGGER.error("Unable to compute mass for %s: %s", inchi, e.getMessage()); } } } }
From source file:com.act.lcms.db.io.LoadConstructAnalysisTableIntoDB.java
public static void main(String[] args) throws Exception { Options opts = new Options(); opts.addOption(Option.builder("i").argName("path").desc("The TSV file to read").hasArg().required() .longOpt("input-file").build()); // DB connection options. opts.addOption(Option.builder().argName("database url") .desc("The url to use when connecting to the LCMS db").hasArg().longOpt("db-url").build()); opts.addOption(Option.builder("u").argName("database user").desc("The LCMS DB user").hasArg() .longOpt("db-user").build()); opts.addOption(Option.builder("p").argName("database password").desc("The LCMS DB password").hasArg() .longOpt("db-pass").build()); opts.addOption(Option.builder("H").argName("database host") .desc(String.format("The LCMS DB host (default = %s)", DB.DEFAULT_HOST)).hasArg().longOpt("db-host") .build());/*from w ww .j a v a 2 s.c o m*/ opts.addOption(Option.builder("P").argName("database port") .desc(String.format("The LCMS DB port (default = %d)", DB.DEFAULT_PORT)).hasArg().longOpt("db-port") .build()); opts.addOption(Option.builder("N").argName("database name") .desc(String.format("The LCMS DB name (default = %s)", DB.DEFAULT_DB_NAME)).hasArg() .longOpt("db-name").build()); // Everybody needs a little help from their friends. opts.addOption( Option.builder("h").argName("help").desc("Prints this help message").longOpt("help").build()); CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HelpFormatter fmt = new HelpFormatter(); fmt.printHelp(LoadConstructAnalysisTableIntoDB.class.getCanonicalName(), opts, true); System.exit(1); } if (cl.hasOption("help")) { new HelpFormatter().printHelp(LoadConstructAnalysisTableIntoDB.class.getCanonicalName(), opts, true); return; } File inputFile = new File(cl.getOptionValue("input-file")); if (!inputFile.exists()) { System.err.format("Unable to find input file at %s\n", cl.getOptionValue("input-file")); new HelpFormatter().printHelp(LoadConstructAnalysisTableIntoDB.class.getCanonicalName(), opts, true); System.exit(1); } DB db; if (cl.hasOption("db-url")) { db = new DB().connectToDB(cl.getOptionValue("db-url")); } else { Integer port = null; if (cl.getOptionValue("P") != null) { port = Integer.parseInt(cl.getOptionValue("P")); } db = new DB().connectToDB(cl.getOptionValue("H"), port, cl.getOptionValue("N"), cl.getOptionValue("u"), cl.getOptionValue("p")); } try { db.getConn().setAutoCommit(false); ConstructAnalysisFileParser parser = new ConstructAnalysisFileParser(); parser.parse(inputFile); List<Pair<Integer, DB.OPERATION_PERFORMED>> results = ChemicalAssociatedWithPathway .insertOrUpdateChemicalsAssociatedWithPathwayFromParser(db, parser); if (results != null) { for (Pair<Integer, DB.OPERATION_PERFORMED> r : results) { System.out.format("%d: %s\n", r.getLeft(), r.getRight()); } } // If we didn't encounter an exception, commit the transaction. db.getConn().commit(); } catch (Exception e) { System.err.format("Caught exception when trying to load plate composition, rolling back. %s\n", e.getMessage()); db.getConn().rollback(); throw (e); } finally { db.getConn().close(); } }
From source file:com.act.lcms.CompareTwoNetCDFAroundMass.java
public static void main(String[] args) throws Exception { if (args.length < 5 || !areNCFiles(Arrays.copyOfRange(args, 3, args.length))) { throw new RuntimeException("Needs: \n" + "(1) mass value, e.g., 132.0772 for debugging, \n" + "(2) how many timepoints to process (-1 for all), \n" + "(3) prefix for .data and rendered .pdf \n" + "(4,5..) 2 or more NetCDF .nc files"); }/*from w ww. j a va 2 s. c o m*/ String fmt = "pdf"; Double mz = Double.parseDouble(args[0]); Integer numSpectraToProcess = Integer.parseInt(args[1]); String outPrefix = args[2]; String outImg = outPrefix.equals("-") ? null : outPrefix + "." + fmt; String outData = outPrefix.equals("-") ? null : outPrefix + ".data"; CompareTwoNetCDFAroundMass c = new CompareTwoNetCDFAroundMass(); String[] netCDF_fnames = Arrays.copyOfRange(args, 3, args.length); List<List<Pair<Double, Double>>> spectra = c.getSpectraForMass(mz, netCDF_fnames, numSpectraToProcess); // Write data output to outfile PrintStream out = outData == null ? System.out : new PrintStream(new FileOutputStream(outData)); // print out the spectra to outData for (List<Pair<Double, Double>> spectraInFile : spectra) { for (Pair<Double, Double> xy : spectraInFile) { out.format("%.4f\t%.4f\n", xy.getLeft(), xy.getRight()); out.flush(); } // delimit this dataset from the rest out.print("\n\n"); } // find the ymax across all spectra, so that we can have a uniform y scale Double yrange = 0.0; for (List<Pair<Double, Double>> spectraInFile : spectra) { Double ymax = 0.0; for (Pair<Double, Double> xy : spectraInFile) { Double intensity = xy.getRight(); if (ymax < intensity) ymax = intensity; } if (yrange < ymax) yrange = ymax; } if (outData != null) { // if outData is != null, then we have written to .data file // now render the .data to the corresponding .pdf file // first close the .data out.close(); // render outData to outFILE using gnuplo Gnuplotter plotter = new Gnuplotter(); plotter.plot2D(outData, outImg, netCDF_fnames, "time in seconds", yrange, "intensity", fmt); } }
From source file:com.act.lcms.MassCalculator.java
public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.format("Usage: %s [InChI [...]]\n", MassCalculator.class.getCanonicalName()); return;//from w ww. j a v a2 s. c o m } System.out.format("InChI\tMass\tCharge\n"); for (String arg : args) { Pair<Double, Integer> massCharge = calculateMassAndCharge(arg); System.out.format("%s\t%.6f\t%d\n", arg, massCharge.getLeft(), massCharge.getRight()); } }
From source file:com.todopl.foreign.msexchange.MSExchange.java
public static void main(String[] args) { try {//from ww w .jav a2 s . c o m Date from = new Date(); Date to = DateUtils.addDays(from, 5); System.out.println("Appointments from today " + from.toString()); Pair<String, FindItemsResults<Appointment>> result = getEvents(null, from, to, "USER", "PASS", "EMAIL"); System.out.println("This is your version: " + result.getLeft()); System.out.println("These are your appointments:"); System.out.println(); for (Appointment a : result.getRight().getItems()) { System.out.println("Subject==== " + a.getSubject()); System.out.println("From======= " + a.getStart()); System.out.println("To========= " + a.getEnd()); System.out.println("Id========= " + a.getId()); System.out.println(); } System.out.println("Now with the version filled in:"); Pair<String, FindItemsResults<Appointment>> result2 = getEvents(result.getLeft(), from, to, "USER", "PASSWD", "EMAIL"); System.out.println("This is your version: " + result2.getLeft().toString()); System.out.println("These are your appointments:"); System.out.println(); for (Appointment a : result2.getRight().getItems()) { System.out.println("Subject==== " + a.getSubject()); System.out.println("From======= " + a.getStart()); System.out.println("To========= " + a.getEnd()); System.out.println(); } } catch (Exception e) { e.printStackTrace(); } }
From source file:de.fuberlin.agcsw.svont.changedetection.smartcex.PartitionEL.java
public static void main(String[] args) { if (args.length != 4) { usage();//from www.ja v a2s. c om } String compatibilityModeStr = args[0]; ReasonerCompatibilityMode compatiblilityMode = null; try { compatiblilityMode = ReasonerCompatibilityMode.valueOf(compatibilityModeStr); } catch (IllegalArgumentException e) { usage(); } try { Pair<OWLOntology, OWLOntology> result = partition(IRI.create(new File(args[1])), compatiblilityMode); result.getLeft().getOWLOntologyManager().saveOntology(result.getLeft(), IRI.create(new File(args[2]))); result.getRight().getOWLOntologyManager().saveOntology(result.getRight(), IRI.create(new File(args[3]))); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.act.lcms.db.io.LoadTSVIntoDB.java
public static void main(String[] args) throws Exception { Options opts = new Options(); opts.addOption(Option.builder("t").argName("type") .desc("The type of TSV data to read, options are: " + StringUtils.join(TSV_TYPE.values(), ", ")) .hasArg().required().longOpt("table-type").build()); opts.addOption(Option.builder("i").argName("path").desc("The TSV file to read").hasArg().required() .longOpt("input-file").build()); // DB connection options. opts.addOption(Option.builder().argName("database url") .desc("The url to use when connecting to the LCMS db").hasArg().longOpt("db-url").build()); opts.addOption(Option.builder("u").argName("database user").desc("The LCMS DB user").hasArg() .longOpt("db-user").build()); opts.addOption(Option.builder("p").argName("database password").desc("The LCMS DB password").hasArg() .longOpt("db-pass").build()); opts.addOption(Option.builder("H").argName("database host") .desc(String.format("The LCMS DB host (default = %s)", DB.DEFAULT_HOST)).hasArg().longOpt("db-host") .build());//from w ww. j av a 2 s .c o m opts.addOption(Option.builder("P").argName("database port") .desc(String.format("The LCMS DB port (default = %d)", DB.DEFAULT_PORT)).hasArg().longOpt("db-port") .build()); opts.addOption(Option.builder("N").argName("database name") .desc(String.format("The LCMS DB name (default = %s)", DB.DEFAULT_DB_NAME)).hasArg() .longOpt("db-name").build()); // Everybody needs a little help from their friends. opts.addOption( Option.builder("h").argName("help").desc("Prints this help message").longOpt("help").build()); CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HelpFormatter fmt = new HelpFormatter(); fmt.printHelp(LoadTSVIntoDB.class.getCanonicalName(), opts, true); System.exit(1); } if (cl.hasOption("help")) { new HelpFormatter().printHelp(LoadTSVIntoDB.class.getCanonicalName(), opts, true); return; } File inputFile = new File(cl.getOptionValue("input-file")); if (!inputFile.exists()) { System.err.format("Unable to find input file at %s\n", cl.getOptionValue("input-file")); new HelpFormatter().printHelp(LoadTSVIntoDB.class.getCanonicalName(), opts, true); System.exit(1); } TSV_TYPE contentType = null; try { contentType = TSV_TYPE.valueOf(cl.getOptionValue("table-type")); } catch (IllegalArgumentException e) { System.err.format("Unrecognized TSV type '%s'\n", cl.getOptionValue("table-type")); new HelpFormatter().printHelp(LoadTSVIntoDB.class.getCanonicalName(), opts, true); System.exit(1); } DB db; if (cl.hasOption("db-url")) { db = new DB().connectToDB(cl.getOptionValue("db-url")); } else { Integer port = null; if (cl.getOptionValue("P") != null) { port = Integer.parseInt(cl.getOptionValue("P")); } db = new DB().connectToDB(cl.getOptionValue("H"), port, cl.getOptionValue("N"), cl.getOptionValue("u"), cl.getOptionValue("p")); } try { db.getConn().setAutoCommit(false); TSVParser parser = new TSVParser(); parser.parse(inputFile); List<Pair<Integer, DB.OPERATION_PERFORMED>> results = null; switch (contentType) { case CURATED_CHEMICAL: results = CuratedChemical.insertOrUpdateCuratedChemicalsFromTSV(db, parser); break; case CONSTRUCT: results = ConstructEntry.insertOrUpdateCompositionMapEntriesFromTSV(db, parser); break; case CHEMICAL_OF_INTEREST: results = ChemicalOfInterest.insertOrUpdateChemicalOfInterestsFromTSV(db, parser); break; default: throw new RuntimeException(String.format("Unsupported TSV type: %s", contentType)); } if (results != null) { for (Pair<Integer, DB.OPERATION_PERFORMED> r : results) { System.out.format("%d: %s\n", r.getLeft(), r.getRight()); } } // If we didn't encounter an exception, commit the transaction. db.getConn().commit(); } catch (Exception e) { System.err.format("Caught exception when trying to load plate composition, rolling back. %s\n", e.getMessage()); db.getConn().rollback(); throw (e); } finally { db.getConn().close(); } }
From source file:com.act.biointerpretation.cofactorremoval.ReactionCofactorRemover.java
public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());// w w w. ja va2 s . co m } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(ReactionDesalter.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } NoSQLAPI api = new NoSQLAPI(cl.getOptionValue(OPTION_READ_DB), cl.getOptionValue(OPTION_READ_DB)); CofactorRemover cofactorRemover = new CofactorRemover(api); cofactorRemover.init(); Pair<Reaction, Reaction> results = cofactorRemover .removeCofactorsFromOneReaction(Long.parseLong(cl.getOptionValue(OPTION_RXN_ID))); System.out.format("Reaction before processing:\n"); printReport(results.getLeft()); System.out.println(); System.out.format("Reaction after processing:\n"); printReport(results.getRight()); System.out.println(); }
From source file:act.installer.pubchem.PubchemSynonymFinder.java
public static void main(String[] args) throws Exception { org.apache.commons.cli.Options opts = new org.apache.commons.cli.Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());/* w w w . j a v a 2 s .c om*/ } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } File rocksDBFile = new File(cl.getOptionValue(OPTION_INDEX_PATH)); if (!rocksDBFile.isDirectory()) { System.err.format("Index directory does not exist or is not a directory at '%s'", rocksDBFile.getAbsolutePath()); HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } List<String> compoundIds = null; if (cl.hasOption(OPTION_PUBCHEM_COMPOUND_ID)) { compoundIds = Collections.singletonList(cl.getOptionValue(OPTION_PUBCHEM_COMPOUND_ID)); } else if (cl.hasOption(OPTION_IDS_FILE)) { File idsFile = new File(cl.getOptionValue(OPTION_IDS_FILE)); if (!idsFile.exists()) { System.err.format("Cannot find Pubchem CIDs file at %s", idsFile.getAbsolutePath()); HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } compoundIds = getCIDsFromFile(idsFile); if (compoundIds.size() == 0) { System.err.format("Found zero Pubchem CIDs to process in file at '%s', exiting", idsFile.getAbsolutePath()); HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } } else { System.err.format("Must specify one of '%s' or '%s'; index is too big to print all synonyms.", OPTION_PUBCHEM_COMPOUND_ID, OPTION_IDS_FILE); HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } // Run a quick check to warn users of malformed ids. compoundIds.forEach(x -> { if (!PC_CID_PATTERN.matcher(x).matches()) { // Use matches() for complete matching. LOGGER.warn("Specified compound id does not match expected format: %s", x); } }); LOGGER.info("Opening DB and searching for %d Pubchem CIDs", compoundIds.size()); Pair<RocksDB, Map<PubchemTTLMerger.COLUMN_FAMILIES, ColumnFamilyHandle>> dbAndHandles = null; Map<String, PubchemSynonyms> results = new LinkedHashMap<>(compoundIds.size()); try { dbAndHandles = PubchemTTLMerger.openExistingRocksDB(rocksDBFile); RocksDB db = dbAndHandles.getLeft(); ColumnFamilyHandle cidToSynonymsCfh = dbAndHandles.getRight() .get(PubchemTTLMerger.COLUMN_FAMILIES.CID_TO_SYNONYMS); for (String cid : compoundIds) { PubchemSynonyms synonyms = null; byte[] val = db.get(cidToSynonymsCfh, cid.getBytes(UTF8)); if (val != null) { ObjectInputStream oi = new ObjectInputStream(new ByteArrayInputStream(val)); // We're relying on our use of a one-value-type per index model here so we can skip the instanceof check. synonyms = (PubchemSynonyms) oi.readObject(); } else { LOGGER.warn("No synonyms available for compound id '%s'", cid); } results.put(cid, synonyms); } } finally { if (dbAndHandles != null) { dbAndHandles.getLeft().close(); } } try (OutputStream outputStream = cl.hasOption(OPTION_OUTPUT) ? new FileOutputStream(cl.getOptionValue(OPTION_OUTPUT)) : System.out) { OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValue(outputStream, results); new OutputStreamWriter(outputStream).append('\n'); } LOGGER.info("Done searching for Pubchem synonyms"); }
From source file:com.twentyn.patentSearch.DocumentSearch.java
public static void main(String[] args) throws Exception { System.out.println("Starting up..."); System.out.flush();/* w ww . j ava2 s . co m*/ Options opts = new Options(); opts.addOption(Option.builder("x").longOpt("index").hasArg().required().desc("Path to index file to read") .build()); opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build()); opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build()); opts.addOption(Option.builder("f").longOpt("field").hasArg().desc("The indexed field to search").build()); opts.addOption( Option.builder("q").longOpt("query").hasArg().desc("The query to use when searching").build()); opts.addOption(Option.builder("l").longOpt("list-file").hasArg() .desc("A file containing a list of queries to run in sequence").build()); opts.addOption( Option.builder("e").longOpt("enumerate").desc("Enumerate the documents in the index").build()); opts.addOption(Option.builder("d").longOpt("dump").hasArg() .desc("Dump terms in the document index for a specified field").build()); opts.addOption( Option.builder("o").longOpt("output").hasArg().desc("Write results JSON to this file.").build()); opts.addOption(Option.builder("n").longOpt("inchi-field").hasArg() .desc("The index of the InChI field if an input TSV is specified.").build()); opts.addOption(Option.builder("s").longOpt("synonym-field").hasArg() .desc("The index of the chemical synonym field if an input TSV is specified.").build()); HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser cmdLineParser = new DefaultParser(); CommandLine cmdLine = null; try { cmdLine = cmdLineParser.parse(opts, args); } catch (ParseException e) { System.out.println("Caught exception when parsing command line: " + e.getMessage()); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("help")) { helpFormatter.printHelp("DocumentIndexer", opts); System.exit(0); } if (!(cmdLine.hasOption("enumerate") || cmdLine.hasOption("dump") || (cmdLine.hasOption("field") && (cmdLine.hasOption("query") || cmdLine.hasOption("list-file"))))) { System.out.println("Must specify one of 'enumerate', 'dump', or 'field' + {'query', 'list-file'}"); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("verbose")) { // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2 LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration ctxConfig = ctx.getConfiguration(); LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME); logConfig.setLevel(Level.DEBUG); ctx.updateLoggers(); LOGGER.debug("Verbose logging enabled"); } ObjectMapper objectMapper = new ObjectMapper(); objectMapper.enable(SerializationFeature.INDENT_OUTPUT); objectMapper.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY); LOGGER.info("Opening index at " + cmdLine.getOptionValue("index")); try (Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath()); IndexReader indexReader = DirectoryReader.open(indexDir);) { if (cmdLine.hasOption("enumerate")) { /* Enumerate all documents in the index. * With help from * http://stackoverflow.com/questions/2311845/is-it-possible-to-iterate-through-documents-stored-in-lucene-index */ for (int i = 0; i < indexReader.maxDoc(); i++) { Document doc = indexReader.document(i); LOGGER.info("Doc " + i + ":"); LOGGER.info(doc); } } else if (cmdLine.hasOption("dump")) { /* Dump indexed terms for a specific field. * With help from http://stackoverflow.com/questions/11148036/find-list-of-terms-indexed-by-lucene */ Terms terms = SlowCompositeReaderWrapper.wrap(indexReader).terms(cmdLine.getOptionValue("dump")); LOGGER.info("Has positions: " + terms.hasPositions()); LOGGER.info("Has offsets: " + terms.hasOffsets()); LOGGER.info("Has freqs: " + terms.hasFreqs()); LOGGER.info("Stats: " + terms.getStats()); LOGGER.info(terms); TermsEnum termsEnum = terms.iterator(); BytesRef br = null; while ((br = termsEnum.next()) != null) { LOGGER.info(" " + br.utf8ToString()); } } else { IndexSearcher searcher = new IndexSearcher(indexReader); String field = cmdLine.getOptionValue("field"); List<Pair<String, String>> queries = null; if (cmdLine.hasOption("query")) { queries = Collections.singletonList(Pair.of("", cmdLine.getOptionValue("query"))); } else if (cmdLine.hasOption("list-file")) { if (!(cmdLine.hasOption("inchi-field") && cmdLine.hasOption("synonym-field"))) { LOGGER.error("Must specify both inchi-field and synonym-field when using list-file."); System.exit(1); } Integer inchiField = Integer.parseInt(cmdLine.getOptionValue("inchi-field")); Integer synonymField = Integer.parseInt(cmdLine.getOptionValue("synonym-field")); queries = new LinkedList<>(); BufferedReader r = new BufferedReader(new FileReader(cmdLine.getOptionValue("list-file"))); String line; while ((line = r.readLine()) != null) { line = line.trim(); if (!line.isEmpty()) { // TODO: use a proper TSV reader; this is intentionally terrible as is. String[] fields = line.split("\t"); queries.add(Pair.of(fields[inchiField].replace("\"", ""), fields[synonymField])); } } r.close(); } if (queries == null || queries.size() == 0) { LOGGER.error("Found no queries to run."); return; } List<SearchResult> searchResults = new ArrayList<>(queries.size()); for (Pair<String, String> queryPair : queries) { String inchi = queryPair.getLeft(); String rawQueryString = queryPair.getRight(); /* The Lucene query parser interprets the kind of structural annotations we see in chemical entities * as query directives, which is not what we want at all. Phrase queries seem to work adequately * with the analyzer we're currently using. */ String queryString = rawQueryString.trim().toLowerCase(); String[] parts = queryString.split("\\s+"); PhraseQuery query = new PhraseQuery(); for (String p : parts) { query.add(new Term(field, p)); } LOGGER.info("Running query: " + query.toString()); BooleanQuery bq = new BooleanQuery(); bq.add(query, BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term(field, "yeast")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "ferment")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentation")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentive")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "saccharomyces")), BooleanClause.Occur.SHOULD); LOGGER.info(" Full query: " + bq.toString()); TopDocs topDocs = searcher.search(bq, 100); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs.length == 0) { LOGGER.info("Search returned no results."); } List<ResultDocument> results = new ArrayList<>(scoreDocs.length); for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; Document doc = indexReader.document(scoreDoc.doc); LOGGER.info("Doc " + i + ": " + scoreDoc.doc + ", score " + scoreDoc.score + ": " + doc.get("id") + ", " + doc.get("title")); results.add(new ResultDocument(scoreDoc.doc, scoreDoc.score, doc.get("title"), doc.get("id"), null)); } LOGGER.info("----- Done with query " + query.toString()); // TODO: reduce memory usage when not writing results to an output file. searchResults.add(new SearchResult(inchi, rawQueryString, bq, results)); } if (cmdLine.hasOption("output")) { try (FileWriter writer = new FileWriter(cmdLine.getOptionValue("output"));) { writer.write(objectMapper.writeValueAsString(searchResults)); } } } } }