List of usage examples for org.apache.commons.lang3.tuple Pair getRight
public abstract R getRight();
Gets the right element from this pair.
When treated as a key-value pair, this is the value.
From source file:com.act.lcms.MassCalculator2.java
public static void main(String[] args) throws Exception { CommandLine cl = CLI_UTIL.parseCommandLine(args); if (cl.hasOption(OPTION_LICENSE_FILE)) { LOGGER.info("Using license file at %s", cl.getOptionValue(OPTION_LICENSE_FILE)); LicenseManager.setLicenseFile(cl.getOptionValue(OPTION_LICENSE_FILE)); }/*from w w w . j a va 2s . c o m*/ List<String> inchis = new ArrayList<>(); if (cl.hasOption(OPTION_INPUT_FILE)) { try (BufferedReader reader = new BufferedReader(new FileReader(cl.getOptionValue(OPTION_INPUT_FILE)))) { String line; while ((line = reader.readLine()) != null) { inchis.add(line); } } } if (cl.getArgList().size() > 0) { LOGGER.info("Reading %d InChIs from the command line", cl.getArgList().size()); inchis.addAll(cl.getArgList()); } try (PrintWriter writer = new PrintWriter( cl.hasOption(OPTION_OUTPUT_FILE) ? new FileWriter(cl.getOptionValue(OPTION_OUTPUT_FILE)) : new OutputStreamWriter(System.out))) { writer.format("InChI\tMass\tCharge\n"); for (String inchi : inchis) { try { Pair<Double, Integer> massAndCharge = calculateMassAndCharge(inchi); writer.format("%s\t%.6f\t%3d\n", inchi, massAndCharge.getLeft(), massAndCharge.getRight()); } catch (MolFormatException e) { LOGGER.error("Unable to compute mass for %s: %s", inchi, e.getMessage()); } } } }
From source file:com.act.lcms.db.io.LoadConstructAnalysisTableIntoDB.java
public static void main(String[] args) throws Exception { Options opts = new Options(); opts.addOption(Option.builder("i").argName("path").desc("The TSV file to read").hasArg().required() .longOpt("input-file").build()); // DB connection options. opts.addOption(Option.builder().argName("database url") .desc("The url to use when connecting to the LCMS db").hasArg().longOpt("db-url").build()); opts.addOption(Option.builder("u").argName("database user").desc("The LCMS DB user").hasArg() .longOpt("db-user").build()); opts.addOption(Option.builder("p").argName("database password").desc("The LCMS DB password").hasArg() .longOpt("db-pass").build()); opts.addOption(Option.builder("H").argName("database host") .desc(String.format("The LCMS DB host (default = %s)", DB.DEFAULT_HOST)).hasArg().longOpt("db-host") .build());// w ww .j ava 2 s .co m opts.addOption(Option.builder("P").argName("database port") .desc(String.format("The LCMS DB port (default = %d)", DB.DEFAULT_PORT)).hasArg().longOpt("db-port") .build()); opts.addOption(Option.builder("N").argName("database name") .desc(String.format("The LCMS DB name (default = %s)", DB.DEFAULT_DB_NAME)).hasArg() .longOpt("db-name").build()); // Everybody needs a little help from their friends. opts.addOption( Option.builder("h").argName("help").desc("Prints this help message").longOpt("help").build()); CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HelpFormatter fmt = new HelpFormatter(); fmt.printHelp(LoadConstructAnalysisTableIntoDB.class.getCanonicalName(), opts, true); System.exit(1); } if (cl.hasOption("help")) { new HelpFormatter().printHelp(LoadConstructAnalysisTableIntoDB.class.getCanonicalName(), opts, true); return; } File inputFile = new File(cl.getOptionValue("input-file")); if (!inputFile.exists()) { System.err.format("Unable to find input file at %s\n", cl.getOptionValue("input-file")); new HelpFormatter().printHelp(LoadConstructAnalysisTableIntoDB.class.getCanonicalName(), opts, true); System.exit(1); } DB db; if (cl.hasOption("db-url")) { db = new DB().connectToDB(cl.getOptionValue("db-url")); } else { Integer port = null; if (cl.getOptionValue("P") != null) { port = Integer.parseInt(cl.getOptionValue("P")); } db = new DB().connectToDB(cl.getOptionValue("H"), port, cl.getOptionValue("N"), cl.getOptionValue("u"), cl.getOptionValue("p")); } try { db.getConn().setAutoCommit(false); ConstructAnalysisFileParser parser = new ConstructAnalysisFileParser(); parser.parse(inputFile); List<Pair<Integer, DB.OPERATION_PERFORMED>> results = ChemicalAssociatedWithPathway .insertOrUpdateChemicalsAssociatedWithPathwayFromParser(db, parser); if (results != null) { for (Pair<Integer, DB.OPERATION_PERFORMED> r : results) { System.out.format("%d: %s\n", r.getLeft(), r.getRight()); } } // If we didn't encounter an exception, commit the transaction. db.getConn().commit(); } catch (Exception e) { System.err.format("Caught exception when trying to load plate composition, rolling back. %s\n", e.getMessage()); db.getConn().rollback(); throw (e); } finally { db.getConn().close(); } }
From source file:com.act.lcms.CompareTwoNetCDFAroundMass.java
public static void main(String[] args) throws Exception { if (args.length < 5 || !areNCFiles(Arrays.copyOfRange(args, 3, args.length))) { throw new RuntimeException("Needs: \n" + "(1) mass value, e.g., 132.0772 for debugging, \n" + "(2) how many timepoints to process (-1 for all), \n" + "(3) prefix for .data and rendered .pdf \n" + "(4,5..) 2 or more NetCDF .nc files"); }/* w w w .j a v a 2 s. c om*/ String fmt = "pdf"; Double mz = Double.parseDouble(args[0]); Integer numSpectraToProcess = Integer.parseInt(args[1]); String outPrefix = args[2]; String outImg = outPrefix.equals("-") ? null : outPrefix + "." + fmt; String outData = outPrefix.equals("-") ? null : outPrefix + ".data"; CompareTwoNetCDFAroundMass c = new CompareTwoNetCDFAroundMass(); String[] netCDF_fnames = Arrays.copyOfRange(args, 3, args.length); List<List<Pair<Double, Double>>> spectra = c.getSpectraForMass(mz, netCDF_fnames, numSpectraToProcess); // Write data output to outfile PrintStream out = outData == null ? System.out : new PrintStream(new FileOutputStream(outData)); // print out the spectra to outData for (List<Pair<Double, Double>> spectraInFile : spectra) { for (Pair<Double, Double> xy : spectraInFile) { out.format("%.4f\t%.4f\n", xy.getLeft(), xy.getRight()); out.flush(); } // delimit this dataset from the rest out.print("\n\n"); } // find the ymax across all spectra, so that we can have a uniform y scale Double yrange = 0.0; for (List<Pair<Double, Double>> spectraInFile : spectra) { Double ymax = 0.0; for (Pair<Double, Double> xy : spectraInFile) { Double intensity = xy.getRight(); if (ymax < intensity) ymax = intensity; } if (yrange < ymax) yrange = ymax; } if (outData != null) { // if outData is != null, then we have written to .data file // now render the .data to the corresponding .pdf file // first close the .data out.close(); // render outData to outFILE using gnuplo Gnuplotter plotter = new Gnuplotter(); plotter.plot2D(outData, outImg, netCDF_fnames, "time in seconds", yrange, "intensity", fmt); } }
From source file:com.act.lcms.MassCalculator.java
public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.format("Usage: %s [InChI [...]]\n", MassCalculator.class.getCanonicalName()); return;//from www.j a va2s . co m } System.out.format("InChI\tMass\tCharge\n"); for (String arg : args) { Pair<Double, Integer> massCharge = calculateMassAndCharge(arg); System.out.format("%s\t%.6f\t%d\n", arg, massCharge.getLeft(), massCharge.getRight()); } }
From source file:com.act.lcms.db.io.LoadTSVIntoDB.java
public static void main(String[] args) throws Exception { Options opts = new Options(); opts.addOption(Option.builder("t").argName("type") .desc("The type of TSV data to read, options are: " + StringUtils.join(TSV_TYPE.values(), ", ")) .hasArg().required().longOpt("table-type").build()); opts.addOption(Option.builder("i").argName("path").desc("The TSV file to read").hasArg().required() .longOpt("input-file").build()); // DB connection options. opts.addOption(Option.builder().argName("database url") .desc("The url to use when connecting to the LCMS db").hasArg().longOpt("db-url").build()); opts.addOption(Option.builder("u").argName("database user").desc("The LCMS DB user").hasArg() .longOpt("db-user").build()); opts.addOption(Option.builder("p").argName("database password").desc("The LCMS DB password").hasArg() .longOpt("db-pass").build()); opts.addOption(Option.builder("H").argName("database host") .desc(String.format("The LCMS DB host (default = %s)", DB.DEFAULT_HOST)).hasArg().longOpt("db-host") .build());/*from ww w .ja v a 2 s.c o m*/ opts.addOption(Option.builder("P").argName("database port") .desc(String.format("The LCMS DB port (default = %d)", DB.DEFAULT_PORT)).hasArg().longOpt("db-port") .build()); opts.addOption(Option.builder("N").argName("database name") .desc(String.format("The LCMS DB name (default = %s)", DB.DEFAULT_DB_NAME)).hasArg() .longOpt("db-name").build()); // Everybody needs a little help from their friends. opts.addOption( Option.builder("h").argName("help").desc("Prints this help message").longOpt("help").build()); CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HelpFormatter fmt = new HelpFormatter(); fmt.printHelp(LoadTSVIntoDB.class.getCanonicalName(), opts, true); System.exit(1); } if (cl.hasOption("help")) { new HelpFormatter().printHelp(LoadTSVIntoDB.class.getCanonicalName(), opts, true); return; } File inputFile = new File(cl.getOptionValue("input-file")); if (!inputFile.exists()) { System.err.format("Unable to find input file at %s\n", cl.getOptionValue("input-file")); new HelpFormatter().printHelp(LoadTSVIntoDB.class.getCanonicalName(), opts, true); System.exit(1); } TSV_TYPE contentType = null; try { contentType = TSV_TYPE.valueOf(cl.getOptionValue("table-type")); } catch (IllegalArgumentException e) { System.err.format("Unrecognized TSV type '%s'\n", cl.getOptionValue("table-type")); new HelpFormatter().printHelp(LoadTSVIntoDB.class.getCanonicalName(), opts, true); System.exit(1); } DB db; if (cl.hasOption("db-url")) { db = new DB().connectToDB(cl.getOptionValue("db-url")); } else { Integer port = null; if (cl.getOptionValue("P") != null) { port = Integer.parseInt(cl.getOptionValue("P")); } db = new DB().connectToDB(cl.getOptionValue("H"), port, cl.getOptionValue("N"), cl.getOptionValue("u"), cl.getOptionValue("p")); } try { db.getConn().setAutoCommit(false); TSVParser parser = new TSVParser(); parser.parse(inputFile); List<Pair<Integer, DB.OPERATION_PERFORMED>> results = null; switch (contentType) { case CURATED_CHEMICAL: results = CuratedChemical.insertOrUpdateCuratedChemicalsFromTSV(db, parser); break; case CONSTRUCT: results = ConstructEntry.insertOrUpdateCompositionMapEntriesFromTSV(db, parser); break; case CHEMICAL_OF_INTEREST: results = ChemicalOfInterest.insertOrUpdateChemicalOfInterestsFromTSV(db, parser); break; default: throw new RuntimeException(String.format("Unsupported TSV type: %s", contentType)); } if (results != null) { for (Pair<Integer, DB.OPERATION_PERFORMED> r : results) { System.out.format("%d: %s\n", r.getLeft(), r.getRight()); } } // If we didn't encounter an exception, commit the transaction. db.getConn().commit(); } catch (Exception e) { System.err.format("Caught exception when trying to load plate composition, rolling back. %s\n", e.getMessage()); db.getConn().rollback(); throw (e); } finally { db.getConn().close(); } }
From source file:de.fuberlin.agcsw.svont.changedetection.smartcex.PartitionEL.java
public static void main(String[] args) { if (args.length != 4) { usage();/*www . ja v a2 s. co m*/ } String compatibilityModeStr = args[0]; ReasonerCompatibilityMode compatiblilityMode = null; try { compatiblilityMode = ReasonerCompatibilityMode.valueOf(compatibilityModeStr); } catch (IllegalArgumentException e) { usage(); } try { Pair<OWLOntology, OWLOntology> result = partition(IRI.create(new File(args[1])), compatiblilityMode); result.getLeft().getOWLOntologyManager().saveOntology(result.getLeft(), IRI.create(new File(args[2]))); result.getRight().getOWLOntologyManager().saveOntology(result.getRight(), IRI.create(new File(args[3]))); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.todopl.foreign.msexchange.MSExchange.java
public static void main(String[] args) { try {/*from w w w . j a v a 2s.c o m*/ Date from = new Date(); Date to = DateUtils.addDays(from, 5); System.out.println("Appointments from today " + from.toString()); Pair<String, FindItemsResults<Appointment>> result = getEvents(null, from, to, "USER", "PASS", "EMAIL"); System.out.println("This is your version: " + result.getLeft()); System.out.println("These are your appointments:"); System.out.println(); for (Appointment a : result.getRight().getItems()) { System.out.println("Subject==== " + a.getSubject()); System.out.println("From======= " + a.getStart()); System.out.println("To========= " + a.getEnd()); System.out.println("Id========= " + a.getId()); System.out.println(); } System.out.println("Now with the version filled in:"); Pair<String, FindItemsResults<Appointment>> result2 = getEvents(result.getLeft(), from, to, "USER", "PASSWD", "EMAIL"); System.out.println("This is your version: " + result2.getLeft().toString()); System.out.println("These are your appointments:"); System.out.println(); for (Appointment a : result2.getRight().getItems()) { System.out.println("Subject==== " + a.getSubject()); System.out.println("From======= " + a.getStart()); System.out.println("To========= " + a.getEnd()); System.out.println(); } } catch (Exception e) { e.printStackTrace(); } }
From source file:com.act.biointerpretation.cofactorremoval.ReactionCofactorRemover.java
public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());// w w w.ja v a 2s . com } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(ReactionDesalter.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } NoSQLAPI api = new NoSQLAPI(cl.getOptionValue(OPTION_READ_DB), cl.getOptionValue(OPTION_READ_DB)); CofactorRemover cofactorRemover = new CofactorRemover(api); cofactorRemover.init(); Pair<Reaction, Reaction> results = cofactorRemover .removeCofactorsFromOneReaction(Long.parseLong(cl.getOptionValue(OPTION_RXN_ID))); System.out.format("Reaction before processing:\n"); printReport(results.getLeft()); System.out.println(); System.out.format("Reaction after processing:\n"); printReport(results.getRight()); System.out.println(); }
From source file:com.act.lcms.db.io.PrintConstructInfo.java
public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());/*www .j a v a 2s . c o m*/ } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } File lcmsDir = new File(cl.getOptionValue(OPTION_DIRECTORY)); if (!lcmsDir.isDirectory()) { System.err.format("File at %s is not a directory\n", lcmsDir.getAbsolutePath()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } try (DB db = DB.openDBFromCLI(cl)) { System.out.print("Loading/updating LCMS scan files into DB\n"); ScanFile.insertOrUpdateScanFilesInDirectory(db, lcmsDir); String construct = cl.getOptionValue(OPTION_CONSTRUCT); List<LCMSWell> lcmsWells = LCMSWell.getInstance().getByConstructID(db, construct); Collections.sort(lcmsWells, new Comparator<LCMSWell>() { @Override public int compare(LCMSWell o1, LCMSWell o2) { return o1.getId().compareTo(o2.getId()); } }); Set<String> uniqueMSIDs = new HashSet<>(); Map<Integer, Plate> platesById = new HashMap<>(); System.out.format("\n\n-- Construct %s --\n\n", construct); List<ChemicalAssociatedWithPathway> pathwayChems = ChemicalAssociatedWithPathway.getInstance() .getChemicalsAssociatedWithPathwayByConstructId(db, construct); System.out.print("Chemicals associated with pathway:\n"); System.out.format(" %-8s%-15s%-45s\n", "index", "kind", "chemical"); for (ChemicalAssociatedWithPathway chem : pathwayChems) { System.out.format(" %-8d%-15s%-45s\n", chem.getIndex(), chem.getKind(), chem.getChemical()); } System.out.print("\nLCMS wells:\n"); System.out.format(" %-15s%-6s%-15s%-15s%-15s\n", "barcode", "well", "msid", "fed", "lcms_count"); for (LCMSWell well : lcmsWells) { uniqueMSIDs.add(well.getMsid()); Plate p = platesById.get(well.getPlateId()); if (p == null) { // TODO: migrate Plate to be a subclass of BaseDBModel. p = Plate.getPlateById(db, well.getPlateId()); platesById.put(p.getId(), p); } String chem = well.getChemical(); List<ScanFile> scanFiles = ScanFile.getScanFileByPlateIDRowAndColumn(db, p.getId(), well.getPlateRow(), well.getPlateColumn()); System.out.format(" %-15s%-6s%-15s%-15s%-15d\n", p.getBarcode(), well.getCoordinatesString(), well.getMsid(), chem == null || chem.isEmpty() ? "--" : chem, scanFiles.size()); System.out.flush(); } List<Integer> plateIds = Arrays.asList(platesById.keySet().toArray(new Integer[platesById.size()])); Collections.sort(plateIds); System.out.print("\nAppears in plates:\n"); for (Integer id : plateIds) { Plate p = platesById.get(id); System.out.format(" %s: %s\n", p.getBarcode(), p.getName()); } List<String> msids = Arrays.asList(uniqueMSIDs.toArray(new String[uniqueMSIDs.size()])); Collections.sort(msids); System.out.format("\nMSIDS: %s\n", StringUtils.join(msids, ", ")); Set<String> availableNegativeControls = new HashSet<>(); for (Map.Entry<Integer, Plate> entry : platesById.entrySet()) { List<LCMSWell> wells = LCMSWell.getInstance().getByPlateId(db, entry.getKey()); for (LCMSWell well : wells) { if (!construct.equals(well.getComposition())) { availableNegativeControls.add(well.getComposition()); } } } // Print available standards for each step w/ plate barcodes and coordinates. System.out.format("\nAvailable Standards:\n"); Map<Integer, Plate> plateCache = new HashMap<>(); for (ChemicalAssociatedWithPathway chem : pathwayChems) { List<StandardWell> matchingWells = StandardWell.getInstance().getStandardWellsByChemical(db, chem.getChemical()); for (StandardWell well : matchingWells) { if (!plateCache.containsKey(well.getPlateId())) { Plate p = Plate.getPlateById(db, well.getPlateId()); plateCache.put(p.getId(), p); } } Map<Integer, List<StandardWell>> standardWellsByPlateId = new HashMap<>(); for (StandardWell well : matchingWells) { List<StandardWell> plateWells = standardWellsByPlateId.get(well.getPlateId()); if (plateWells == null) { plateWells = new ArrayList<>(); standardWellsByPlateId.put(well.getPlateId(), plateWells); } plateWells.add(well); } List<Pair<String, Integer>> plateBarcodes = new ArrayList<>(plateCache.size()); for (Plate p : plateCache.values()) { if (p.getBarcode() == null) { plateBarcodes.add(Pair.of("(no barcode)", p.getId())); } else { plateBarcodes.add(Pair.of(p.getBarcode(), p.getId())); } } Collections.sort(plateBarcodes); System.out.format(" %s:\n", chem.getChemical()); for (Pair<String, Integer> barcodePair : plateBarcodes) { // TODO: hoist this whole sorting/translation step into a utility class. List<StandardWell> wells = standardWellsByPlateId.get(barcodePair.getRight()); if (wells == null) { // Don't print plates that don't apply to this chemical, which can happen because we're caching the plates. continue; } Collections.sort(wells, new Comparator<StandardWell>() { @Override public int compare(StandardWell o1, StandardWell o2) { int c = o1.getPlateRow().compareTo(o2.getPlateRow()); if (c != 0) return c; return o1.getPlateColumn().compareTo(o2.getPlateColumn()); } }); List<String> descriptions = new ArrayList<>(wells.size()); for (StandardWell well : wells) { descriptions.add(String.format("%s in %s%s", well.getCoordinatesString(), well.getMedia(), well.getConcentration() == null ? "" : String.format(" c. %f", well.getConcentration()))); } System.out.format(" %s: %s\n", barcodePair.getLeft(), StringUtils.join(descriptions, ", ")); } } List<String> negativeControlStrains = Arrays .asList(availableNegativeControls.toArray(new String[availableNegativeControls.size()])); Collections.sort(negativeControlStrains); System.out.format("\nAvailable negative controls: %s\n", StringUtils.join(negativeControlStrains, ",")); System.out.print("\n----------\n"); System.out.print("\n\n"); } }
From source file:com.twentyn.patentSearch.DocumentSearch.java
public static void main(String[] args) throws Exception { System.out.println("Starting up..."); System.out.flush();//w w w .jav a 2 s . co m Options opts = new Options(); opts.addOption(Option.builder("x").longOpt("index").hasArg().required().desc("Path to index file to read") .build()); opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build()); opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build()); opts.addOption(Option.builder("f").longOpt("field").hasArg().desc("The indexed field to search").build()); opts.addOption( Option.builder("q").longOpt("query").hasArg().desc("The query to use when searching").build()); opts.addOption(Option.builder("l").longOpt("list-file").hasArg() .desc("A file containing a list of queries to run in sequence").build()); opts.addOption( Option.builder("e").longOpt("enumerate").desc("Enumerate the documents in the index").build()); opts.addOption(Option.builder("d").longOpt("dump").hasArg() .desc("Dump terms in the document index for a specified field").build()); opts.addOption( Option.builder("o").longOpt("output").hasArg().desc("Write results JSON to this file.").build()); opts.addOption(Option.builder("n").longOpt("inchi-field").hasArg() .desc("The index of the InChI field if an input TSV is specified.").build()); opts.addOption(Option.builder("s").longOpt("synonym-field").hasArg() .desc("The index of the chemical synonym field if an input TSV is specified.").build()); HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser cmdLineParser = new DefaultParser(); CommandLine cmdLine = null; try { cmdLine = cmdLineParser.parse(opts, args); } catch (ParseException e) { System.out.println("Caught exception when parsing command line: " + e.getMessage()); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("help")) { helpFormatter.printHelp("DocumentIndexer", opts); System.exit(0); } if (!(cmdLine.hasOption("enumerate") || cmdLine.hasOption("dump") || (cmdLine.hasOption("field") && (cmdLine.hasOption("query") || cmdLine.hasOption("list-file"))))) { System.out.println("Must specify one of 'enumerate', 'dump', or 'field' + {'query', 'list-file'}"); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("verbose")) { // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2 LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration ctxConfig = ctx.getConfiguration(); LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME); logConfig.setLevel(Level.DEBUG); ctx.updateLoggers(); LOGGER.debug("Verbose logging enabled"); } ObjectMapper objectMapper = new ObjectMapper(); objectMapper.enable(SerializationFeature.INDENT_OUTPUT); objectMapper.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY); LOGGER.info("Opening index at " + cmdLine.getOptionValue("index")); try (Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath()); IndexReader indexReader = DirectoryReader.open(indexDir);) { if (cmdLine.hasOption("enumerate")) { /* Enumerate all documents in the index. * With help from * http://stackoverflow.com/questions/2311845/is-it-possible-to-iterate-through-documents-stored-in-lucene-index */ for (int i = 0; i < indexReader.maxDoc(); i++) { Document doc = indexReader.document(i); LOGGER.info("Doc " + i + ":"); LOGGER.info(doc); } } else if (cmdLine.hasOption("dump")) { /* Dump indexed terms for a specific field. * With help from http://stackoverflow.com/questions/11148036/find-list-of-terms-indexed-by-lucene */ Terms terms = SlowCompositeReaderWrapper.wrap(indexReader).terms(cmdLine.getOptionValue("dump")); LOGGER.info("Has positions: " + terms.hasPositions()); LOGGER.info("Has offsets: " + terms.hasOffsets()); LOGGER.info("Has freqs: " + terms.hasFreqs()); LOGGER.info("Stats: " + terms.getStats()); LOGGER.info(terms); TermsEnum termsEnum = terms.iterator(); BytesRef br = null; while ((br = termsEnum.next()) != null) { LOGGER.info(" " + br.utf8ToString()); } } else { IndexSearcher searcher = new IndexSearcher(indexReader); String field = cmdLine.getOptionValue("field"); List<Pair<String, String>> queries = null; if (cmdLine.hasOption("query")) { queries = Collections.singletonList(Pair.of("", cmdLine.getOptionValue("query"))); } else if (cmdLine.hasOption("list-file")) { if (!(cmdLine.hasOption("inchi-field") && cmdLine.hasOption("synonym-field"))) { LOGGER.error("Must specify both inchi-field and synonym-field when using list-file."); System.exit(1); } Integer inchiField = Integer.parseInt(cmdLine.getOptionValue("inchi-field")); Integer synonymField = Integer.parseInt(cmdLine.getOptionValue("synonym-field")); queries = new LinkedList<>(); BufferedReader r = new BufferedReader(new FileReader(cmdLine.getOptionValue("list-file"))); String line; while ((line = r.readLine()) != null) { line = line.trim(); if (!line.isEmpty()) { // TODO: use a proper TSV reader; this is intentionally terrible as is. String[] fields = line.split("\t"); queries.add(Pair.of(fields[inchiField].replace("\"", ""), fields[synonymField])); } } r.close(); } if (queries == null || queries.size() == 0) { LOGGER.error("Found no queries to run."); return; } List<SearchResult> searchResults = new ArrayList<>(queries.size()); for (Pair<String, String> queryPair : queries) { String inchi = queryPair.getLeft(); String rawQueryString = queryPair.getRight(); /* The Lucene query parser interprets the kind of structural annotations we see in chemical entities * as query directives, which is not what we want at all. Phrase queries seem to work adequately * with the analyzer we're currently using. */ String queryString = rawQueryString.trim().toLowerCase(); String[] parts = queryString.split("\\s+"); PhraseQuery query = new PhraseQuery(); for (String p : parts) { query.add(new Term(field, p)); } LOGGER.info("Running query: " + query.toString()); BooleanQuery bq = new BooleanQuery(); bq.add(query, BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term(field, "yeast")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "ferment")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentation")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentive")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "saccharomyces")), BooleanClause.Occur.SHOULD); LOGGER.info(" Full query: " + bq.toString()); TopDocs topDocs = searcher.search(bq, 100); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs.length == 0) { LOGGER.info("Search returned no results."); } List<ResultDocument> results = new ArrayList<>(scoreDocs.length); for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; Document doc = indexReader.document(scoreDoc.doc); LOGGER.info("Doc " + i + ": " + scoreDoc.doc + ", score " + scoreDoc.score + ": " + doc.get("id") + ", " + doc.get("title")); results.add(new ResultDocument(scoreDoc.doc, scoreDoc.score, doc.get("title"), doc.get("id"), null)); } LOGGER.info("----- Done with query " + query.toString()); // TODO: reduce memory usage when not writing results to an output file. searchResults.add(new SearchResult(inchi, rawQueryString, bq, results)); } if (cmdLine.hasOption("output")) { try (FileWriter writer = new FileWriter(cmdLine.getOptionValue("output"));) { writer.write(objectMapper.writeValueAsString(searchResults)); } } } } }