List of usage examples for org.apache.commons.lang3.tuple Pair of
public static <L, R> Pair<L, R> of(final L left, final R right)
Obtains an immutable pair of from two objects inferring the generic types.
This factory allows the pair to be created using inference to obtain the generic types.
From source file:eu.crydee.stanfordcorenlp.Tokenizer.java
/** * Wrapper around Stanford CoreNLP to tokenize text. * * Give it an input dir of text files with --input-dir and it'll ouput * tokenized versions, one sentence per line with space separated words to * --output-dir (defaults to out/).//from w ww. j av a2s .c om * * @param args CLI args. Example: --input-dir my-input --output-dir * my-output. */ public static void main(String[] args) { ArgumentParser parser = ArgumentParsers.newArgumentParser("stanford-corenlp-tokenizer-wrapper") .description("Converts Mediawiki dumps to text."); parser.addArgument("-i", "--input-dir").required(true).help("Path of the input text files directory."); parser.addArgument("-o", "--output-dir").help("Path of the output text files directory.").setDefault("out"); Params params = new Params(); try { parser.parseArgs(args, params); } catch (ArgumentParserException ex) { System.err.println("Could not parse arguments: " + ex.getMessage()); System.exit(1); } Tokenizer tokenizer = new Tokenizer(); try { Files.list(Paths.get(params.inDirPath)).filter(Files::isRegularFile).map(Path::toFile).map(f -> { try { return Pair.of(f.getName(), FileUtils.readFileToString(f, StandardCharsets.UTF_8)); } catch (IOException ex) { System.err.println("Could not read input text file: " + ex.getLocalizedMessage()); throw new UncheckedIOException(ex); } }).forEach(p -> { String text = tokenizer.tokenizeAndSentenceSplit(p.getRight()); try { FileUtils.writeStringToFile(Paths.get(params.outDirpath, p.getLeft()).toFile(), text, StandardCharsets.UTF_8); } catch (IOException ex) { System.err.println("Could not write output text file: " + ex.getLocalizedMessage()); } }); } catch (IOException ex) { System.err.println("Could not read from input directory: " + ex.getLocalizedMessage()); } }
From source file:com.twentyn.patentSearch.DocumentSearch.java
public static void main(String[] args) throws Exception { System.out.println("Starting up..."); System.out.flush();/*from ww w. j a v a 2s. c o m*/ Options opts = new Options(); opts.addOption(Option.builder("x").longOpt("index").hasArg().required().desc("Path to index file to read") .build()); opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build()); opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build()); opts.addOption(Option.builder("f").longOpt("field").hasArg().desc("The indexed field to search").build()); opts.addOption( Option.builder("q").longOpt("query").hasArg().desc("The query to use when searching").build()); opts.addOption(Option.builder("l").longOpt("list-file").hasArg() .desc("A file containing a list of queries to run in sequence").build()); opts.addOption( Option.builder("e").longOpt("enumerate").desc("Enumerate the documents in the index").build()); opts.addOption(Option.builder("d").longOpt("dump").hasArg() .desc("Dump terms in the document index for a specified field").build()); opts.addOption( Option.builder("o").longOpt("output").hasArg().desc("Write results JSON to this file.").build()); opts.addOption(Option.builder("n").longOpt("inchi-field").hasArg() .desc("The index of the InChI field if an input TSV is specified.").build()); opts.addOption(Option.builder("s").longOpt("synonym-field").hasArg() .desc("The index of the chemical synonym field if an input TSV is specified.").build()); HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser cmdLineParser = new DefaultParser(); CommandLine cmdLine = null; try { cmdLine = cmdLineParser.parse(opts, args); } catch (ParseException e) { System.out.println("Caught exception when parsing command line: " + e.getMessage()); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("help")) { helpFormatter.printHelp("DocumentIndexer", opts); System.exit(0); } if (!(cmdLine.hasOption("enumerate") || cmdLine.hasOption("dump") || (cmdLine.hasOption("field") && (cmdLine.hasOption("query") || cmdLine.hasOption("list-file"))))) { System.out.println("Must specify one of 'enumerate', 'dump', or 'field' + {'query', 'list-file'}"); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("verbose")) { // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2 LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration ctxConfig = ctx.getConfiguration(); LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME); logConfig.setLevel(Level.DEBUG); ctx.updateLoggers(); LOGGER.debug("Verbose logging enabled"); } ObjectMapper objectMapper = new ObjectMapper(); objectMapper.enable(SerializationFeature.INDENT_OUTPUT); objectMapper.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY); LOGGER.info("Opening index at " + cmdLine.getOptionValue("index")); try (Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath()); IndexReader indexReader = DirectoryReader.open(indexDir);) { if (cmdLine.hasOption("enumerate")) { /* Enumerate all documents in the index. * With help from * http://stackoverflow.com/questions/2311845/is-it-possible-to-iterate-through-documents-stored-in-lucene-index */ for (int i = 0; i < indexReader.maxDoc(); i++) { Document doc = indexReader.document(i); LOGGER.info("Doc " + i + ":"); LOGGER.info(doc); } } else if (cmdLine.hasOption("dump")) { /* Dump indexed terms for a specific field. * With help from http://stackoverflow.com/questions/11148036/find-list-of-terms-indexed-by-lucene */ Terms terms = SlowCompositeReaderWrapper.wrap(indexReader).terms(cmdLine.getOptionValue("dump")); LOGGER.info("Has positions: " + terms.hasPositions()); LOGGER.info("Has offsets: " + terms.hasOffsets()); LOGGER.info("Has freqs: " + terms.hasFreqs()); LOGGER.info("Stats: " + terms.getStats()); LOGGER.info(terms); TermsEnum termsEnum = terms.iterator(); BytesRef br = null; while ((br = termsEnum.next()) != null) { LOGGER.info(" " + br.utf8ToString()); } } else { IndexSearcher searcher = new IndexSearcher(indexReader); String field = cmdLine.getOptionValue("field"); List<Pair<String, String>> queries = null; if (cmdLine.hasOption("query")) { queries = Collections.singletonList(Pair.of("", cmdLine.getOptionValue("query"))); } else if (cmdLine.hasOption("list-file")) { if (!(cmdLine.hasOption("inchi-field") && cmdLine.hasOption("synonym-field"))) { LOGGER.error("Must specify both inchi-field and synonym-field when using list-file."); System.exit(1); } Integer inchiField = Integer.parseInt(cmdLine.getOptionValue("inchi-field")); Integer synonymField = Integer.parseInt(cmdLine.getOptionValue("synonym-field")); queries = new LinkedList<>(); BufferedReader r = new BufferedReader(new FileReader(cmdLine.getOptionValue("list-file"))); String line; while ((line = r.readLine()) != null) { line = line.trim(); if (!line.isEmpty()) { // TODO: use a proper TSV reader; this is intentionally terrible as is. String[] fields = line.split("\t"); queries.add(Pair.of(fields[inchiField].replace("\"", ""), fields[synonymField])); } } r.close(); } if (queries == null || queries.size() == 0) { LOGGER.error("Found no queries to run."); return; } List<SearchResult> searchResults = new ArrayList<>(queries.size()); for (Pair<String, String> queryPair : queries) { String inchi = queryPair.getLeft(); String rawQueryString = queryPair.getRight(); /* The Lucene query parser interprets the kind of structural annotations we see in chemical entities * as query directives, which is not what we want at all. Phrase queries seem to work adequately * with the analyzer we're currently using. */ String queryString = rawQueryString.trim().toLowerCase(); String[] parts = queryString.split("\\s+"); PhraseQuery query = new PhraseQuery(); for (String p : parts) { query.add(new Term(field, p)); } LOGGER.info("Running query: " + query.toString()); BooleanQuery bq = new BooleanQuery(); bq.add(query, BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term(field, "yeast")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "ferment")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentation")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentive")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "saccharomyces")), BooleanClause.Occur.SHOULD); LOGGER.info(" Full query: " + bq.toString()); TopDocs topDocs = searcher.search(bq, 100); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs.length == 0) { LOGGER.info("Search returned no results."); } List<ResultDocument> results = new ArrayList<>(scoreDocs.length); for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; Document doc = indexReader.document(scoreDoc.doc); LOGGER.info("Doc " + i + ": " + scoreDoc.doc + ", score " + scoreDoc.score + ": " + doc.get("id") + ", " + doc.get("title")); results.add(new ResultDocument(scoreDoc.doc, scoreDoc.score, doc.get("title"), doc.get("id"), null)); } LOGGER.info("----- Done with query " + query.toString()); // TODO: reduce memory usage when not writing results to an output file. searchResults.add(new SearchResult(inchi, rawQueryString, bq, results)); } if (cmdLine.hasOption("output")) { try (FileWriter writer = new FileWriter(cmdLine.getOptionValue("output"));) { writer.write(objectMapper.writeValueAsString(searchResults)); } } } } }
From source file:com.act.lcms.db.io.PrintConstructInfo.java
public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());//from w w w .j av a 2 s . c om } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } File lcmsDir = new File(cl.getOptionValue(OPTION_DIRECTORY)); if (!lcmsDir.isDirectory()) { System.err.format("File at %s is not a directory\n", lcmsDir.getAbsolutePath()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } try (DB db = DB.openDBFromCLI(cl)) { System.out.print("Loading/updating LCMS scan files into DB\n"); ScanFile.insertOrUpdateScanFilesInDirectory(db, lcmsDir); String construct = cl.getOptionValue(OPTION_CONSTRUCT); List<LCMSWell> lcmsWells = LCMSWell.getInstance().getByConstructID(db, construct); Collections.sort(lcmsWells, new Comparator<LCMSWell>() { @Override public int compare(LCMSWell o1, LCMSWell o2) { return o1.getId().compareTo(o2.getId()); } }); Set<String> uniqueMSIDs = new HashSet<>(); Map<Integer, Plate> platesById = new HashMap<>(); System.out.format("\n\n-- Construct %s --\n\n", construct); List<ChemicalAssociatedWithPathway> pathwayChems = ChemicalAssociatedWithPathway.getInstance() .getChemicalsAssociatedWithPathwayByConstructId(db, construct); System.out.print("Chemicals associated with pathway:\n"); System.out.format(" %-8s%-15s%-45s\n", "index", "kind", "chemical"); for (ChemicalAssociatedWithPathway chem : pathwayChems) { System.out.format(" %-8d%-15s%-45s\n", chem.getIndex(), chem.getKind(), chem.getChemical()); } System.out.print("\nLCMS wells:\n"); System.out.format(" %-15s%-6s%-15s%-15s%-15s\n", "barcode", "well", "msid", "fed", "lcms_count"); for (LCMSWell well : lcmsWells) { uniqueMSIDs.add(well.getMsid()); Plate p = platesById.get(well.getPlateId()); if (p == null) { // TODO: migrate Plate to be a subclass of BaseDBModel. p = Plate.getPlateById(db, well.getPlateId()); platesById.put(p.getId(), p); } String chem = well.getChemical(); List<ScanFile> scanFiles = ScanFile.getScanFileByPlateIDRowAndColumn(db, p.getId(), well.getPlateRow(), well.getPlateColumn()); System.out.format(" %-15s%-6s%-15s%-15s%-15d\n", p.getBarcode(), well.getCoordinatesString(), well.getMsid(), chem == null || chem.isEmpty() ? "--" : chem, scanFiles.size()); System.out.flush(); } List<Integer> plateIds = Arrays.asList(platesById.keySet().toArray(new Integer[platesById.size()])); Collections.sort(plateIds); System.out.print("\nAppears in plates:\n"); for (Integer id : plateIds) { Plate p = platesById.get(id); System.out.format(" %s: %s\n", p.getBarcode(), p.getName()); } List<String> msids = Arrays.asList(uniqueMSIDs.toArray(new String[uniqueMSIDs.size()])); Collections.sort(msids); System.out.format("\nMSIDS: %s\n", StringUtils.join(msids, ", ")); Set<String> availableNegativeControls = new HashSet<>(); for (Map.Entry<Integer, Plate> entry : platesById.entrySet()) { List<LCMSWell> wells = LCMSWell.getInstance().getByPlateId(db, entry.getKey()); for (LCMSWell well : wells) { if (!construct.equals(well.getComposition())) { availableNegativeControls.add(well.getComposition()); } } } // Print available standards for each step w/ plate barcodes and coordinates. System.out.format("\nAvailable Standards:\n"); Map<Integer, Plate> plateCache = new HashMap<>(); for (ChemicalAssociatedWithPathway chem : pathwayChems) { List<StandardWell> matchingWells = StandardWell.getInstance().getStandardWellsByChemical(db, chem.getChemical()); for (StandardWell well : matchingWells) { if (!plateCache.containsKey(well.getPlateId())) { Plate p = Plate.getPlateById(db, well.getPlateId()); plateCache.put(p.getId(), p); } } Map<Integer, List<StandardWell>> standardWellsByPlateId = new HashMap<>(); for (StandardWell well : matchingWells) { List<StandardWell> plateWells = standardWellsByPlateId.get(well.getPlateId()); if (plateWells == null) { plateWells = new ArrayList<>(); standardWellsByPlateId.put(well.getPlateId(), plateWells); } plateWells.add(well); } List<Pair<String, Integer>> plateBarcodes = new ArrayList<>(plateCache.size()); for (Plate p : plateCache.values()) { if (p.getBarcode() == null) { plateBarcodes.add(Pair.of("(no barcode)", p.getId())); } else { plateBarcodes.add(Pair.of(p.getBarcode(), p.getId())); } } Collections.sort(plateBarcodes); System.out.format(" %s:\n", chem.getChemical()); for (Pair<String, Integer> barcodePair : plateBarcodes) { // TODO: hoist this whole sorting/translation step into a utility class. List<StandardWell> wells = standardWellsByPlateId.get(barcodePair.getRight()); if (wells == null) { // Don't print plates that don't apply to this chemical, which can happen because we're caching the plates. continue; } Collections.sort(wells, new Comparator<StandardWell>() { @Override public int compare(StandardWell o1, StandardWell o2) { int c = o1.getPlateRow().compareTo(o2.getPlateRow()); if (c != 0) return c; return o1.getPlateColumn().compareTo(o2.getPlateColumn()); } }); List<String> descriptions = new ArrayList<>(wells.size()); for (StandardWell well : wells) { descriptions.add(String.format("%s in %s%s", well.getCoordinatesString(), well.getMedia(), well.getConcentration() == null ? "" : String.format(" c. %f", well.getConcentration()))); } System.out.format(" %s: %s\n", barcodePair.getLeft(), StringUtils.join(descriptions, ", ")); } } List<String> negativeControlStrains = Arrays .asList(availableNegativeControls.toArray(new String[availableNegativeControls.size()])); Collections.sort(negativeControlStrains); System.out.format("\nAvailable negative controls: %s\n", StringUtils.join(negativeControlStrains, ",")); System.out.print("\n----------\n"); System.out.print("\n\n"); } }
From source file:blusunrize.immersiveengineering.api.tool.RailgunHandler.java
public static RailgunProjectileProperties registerProjectileProperties(IngredientStack stack, double damage, double gravity) { RailgunProjectileProperties properties = new RailgunProjectileProperties(damage, gravity); projectilePropertyMap.add(Pair.of(stack, properties)); return properties; }
From source file:edu.umd.umiacs.clip.tools.math.Formatter.java
public static Pair<Double, Double> format(Pair<Double, Double> pair) { return Pair.of(format(pair.getLeft()), format(pair.getRight())); }
From source file:de.pixida.logtest.logreaders.PatternMatchingsStripper.java
public static String strip(final Matcher matcher, final String value) { if (matcher.groupCount() == 0) { return value; }//from w w w. jav a2 s . c o m // Remove optional matches which were empty and filter nested matches final List<Pair<Integer, Integer>> realMatches = new ArrayList<>(); int lastMostOuterMatchEnd = -1; for (int i = 1; i <= matcher.groupCount(); i++) { if (matcher.start(i) != -1) { if (matcher.end(i) <= lastMostOuterMatchEnd) { continue; } lastMostOuterMatchEnd = matcher.end(i); realMatches.add(Pair.of(matcher.start(i), matcher.end(i))); } } if (realMatches.isEmpty()) { return value; } // Removal final StringBuilder sb = new StringBuilder(value.substring(0, realMatches.get(0).getLeft())); for (int i = 0; i < realMatches.size(); i++) { // Assumption: Substring before start of match was already appended if (i + 1 < realMatches.size()) { sb.append(value.substring(realMatches.get(i).getRight(), realMatches.get(i + 1).getLeft())); } else { sb.append(value.substring(realMatches.get(i).getRight())); } } return sb.toString(); }
From source file:alfio.controller.api.admin.SerializablePair.java
static <L, R> SerializablePair<L, R> of(L left, R right) { return new SerializablePair<>(Pair.of(left, right)); }
From source file:edu.washington.gs.skyline.model.quantification.PValues.java
public static double[] adjustPValues(double[] pValues) { List<Pair<Double, Integer>> entries = new ArrayList<>(); DoubleStream.of(pValues).forEach(value -> entries.add(Pair.of(value, entries.size()))); Collections.sort(entries);//w w w. j av a 2 s . co m double currentMin = 1.0; double[] result = new double[entries.size()]; for (int i = entries.size() - 1; i >= 0; i--) { double value = entries.get(i).getLeft() * entries.size() / (i + 1); currentMin = Math.min(value, currentMin); result[entries.get(i).getRight()] = currentMin; } return result; }
From source file:mase.mason.world.GeomUtils.java
public static Pair<Double2D, Double2D> computeBB(Double2D[] points) { double xMin = Double.POSITIVE_INFINITY; double yMin = Double.POSITIVE_INFINITY; double xMax = Double.NEGATIVE_INFINITY; double yMax = Double.NEGATIVE_INFINITY; for (Double2D p : points) { xMin = Math.min(xMin, p.x); yMin = Math.min(yMin, p.y); xMax = Math.max(xMax, p.x); yMax = Math.max(yMax, p.y); }// w ww. j a va 2 s . c om return Pair.of(new Double2D(xMin, yMin), new Double2D(xMax, yMax)); }
From source file:com.twitter.distributedlog.service.ClientUtils.java
public static Pair<DistributedLogClient, MonitorServiceClient> buildClient( DistributedLogClientBuilder builder) { DistributedLogClientImpl clientImpl = builder.buildClient(); return Pair.of((DistributedLogClient) clientImpl, (MonitorServiceClient) clientImpl); }