List of usage examples for com.google.common.base Joiner join
@CheckReturnValue public final String join(Object[] parts)
From source file:pl.coffeepower.blog.examples.LambdaExpExample.java
public static void main(String[] args) { LambdaExpExample example = new LambdaExpExample(); Joiner joiner = Joiner.on(", "); log.info("numbers: " + joiner.join(example.getNumbers())); log.info("odd numbers: " + NumberUtils.getOddNumbers(example.getNumbers())); log.info("even numbers: " + NumberUtils.getEvenNumbers(example.getNumbers())); }
From source file:apps.Source2XML.java
public static void main(String[] args) { Options options = new Options(); options.addOption("i", null, true, "input file"); options.addOption("o", null, true, "output file"); options.addOption("reparse_xml", null, false, "reparse each XML entry to ensure the parser doesn't fail"); Joiner commaJoin = Joiner.on(','); options.addOption("source_type", null, true, "document source type: " + commaJoin.join(SourceFactory.getDocSourceList())); Joiner spaceJoin = Joiner.on(' '); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); BufferedWriter outputFile = null; int docNum = 0; if (USE_LEMMATIZER && USE_STEMMER) { System.err.println("Bug/inconsistent code: cann't use the stemmer and lemmatizer at the same time!"); System.exit(1);/*from ww w .j a v a 2s .c o m*/ } //Stemmer stemmer = new Stemmer(); KrovetzStemmer stemmer = new KrovetzStemmer(); System.out.println("Using Stanford NLP? " + USE_STANFORD); System.out.println("Using Stanford lemmatizer? " + USE_LEMMATIZER); System.out.println("Using stemmer? " + USE_STEMMER + (USE_STEMMER ? " (class: " + stemmer.getClass().getCanonicalName() + ")" : "")); try { CommandLine cmd = parser.parse(options, args); String inputFileName = null, outputFileName = null; if (cmd.hasOption("i")) { inputFileName = cmd.getOptionValue("i"); } else { Usage("Specify 'input file'", options); } if (cmd.hasOption("o")) { outputFileName = cmd.getOptionValue("o"); } else { Usage("Specify 'output file'", options); } outputFile = new BufferedWriter( new OutputStreamWriter(CompressUtils.createOutputStream(outputFileName))); String sourceName = cmd.getOptionValue("source_type"); if (sourceName == null) Usage("Specify document source type", options); boolean reparseXML = options.hasOption("reparse_xml"); DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName); DocumentEntry inpDoc = null; TextCleaner textCleaner = new TextCleaner( new DictNoComments(new File("data/stopwords.txt"), true /* lower case */), USE_STANFORD, USE_LEMMATIZER); Map<String, String> outputMap = new HashMap<String, String>(); outputMap.put(UtilConst.XML_FIELD_DOCNO, null); outputMap.put(UtilConst.XML_FIELD_TEXT, null); XmlHelper xmlHlp = new XmlHelper(); if (reparseXML) System.out.println("Will reparse every XML entry to verify correctness!"); while ((inpDoc = inpDocSource.next()) != null) { ++docNum; ArrayList<String> toks = textCleaner.cleanUp(inpDoc.mDocText); ArrayList<String> goodToks = new ArrayList<String>(); for (String s : toks) if (s.length() <= MAX_WORD_LEN && // Exclude long and short words s.length() >= MIN_WORD_LEN && isGoodWord(s)) goodToks.add(USE_STEMMER ? stemmer.stem(s) : s); String partlyCleanedText = spaceJoin.join(goodToks); String cleanText = XmlHelper.removeInvaildXMLChars(partlyCleanedText); // isGoodWord combiend with Stanford tokenizer should be quite restrictive already //cleanText = replaceSomePunct(cleanText); outputMap.replace(UtilConst.XML_FIELD_DOCNO, inpDoc.mDocId); outputMap.replace(UtilConst.XML_FIELD_TEXT, cleanText); String xml = xmlHlp.genXMLIndexEntry(outputMap); if (reparseXML) { try { XmlHelper.parseDocWithoutXMLDecl(xml); } catch (Exception e) { System.err.println("Error re-parsing xml for document ID: " + inpDoc.mDocId); System.exit(1); } } /* { System.out.println(inpDoc.mDocId); System.out.println("====================="); System.out.println(partlyCleanedText); System.out.println("====================="); System.out.println(cleanText); } */ try { outputFile.write(xml); outputFile.write(NL); } catch (Exception e) { e.printStackTrace(); System.err.println("Error processing/saving a document!"); } if (docNum % 1000 == 0) System.out.println(String.format("Processed %d documents", docNum)); } } catch (ParseException e) { e.printStackTrace(); Usage("Cannot parse arguments" + e, options); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } finally { System.out.println(String.format("Processed %d documents", docNum)); try { if (null != outputFile) { outputFile.close(); System.out.println("Output file is closed! all seems to be fine..."); } } catch (IOException e) { System.err.println("IO exception: " + e); e.printStackTrace(); } } }
From source file:apps.LuceneIndexer.java
public static void main(String[] args) { Options options = new Options(); options.addOption("i", null, true, "input file"); options.addOption("o", null, true, "output directory"); options.addOption("r", null, true, "optional output TREC-format QREL file"); options.addOption("bm25_b", null, true, "BM25 parameter: b"); options.addOption("bm25_k1", null, true, "BM25 parameter: k1"); options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity"); Joiner commaJoin = Joiner.on(','); Joiner spaceJoin = Joiner.on(' '); options.addOption("source_type", null, true, "document source type: " + commaJoin.join(SourceFactory.getDocSourceList())); // If you increase this value, you may need to modify the following line in *.sh file // export MAVEN_OPTS="-Xms8192m -server" double ramBufferSizeMB = 1024 * 8; // 8 GB CommandLineParser parser = new org.apache.commons.cli.GnuParser(); IndexWriter indexWriter = null;/*w w w. ja v a 2s .co m*/ BufferedWriter qrelWriter = null; int docNum = 0; try { CommandLine cmd = parser.parse(options, args); String inputFileName = null, outputDirName = null, qrelFileName = null; if (cmd.hasOption("i")) { inputFileName = cmd.getOptionValue("i"); } else { Usage("Specify 'input file'", options); } if (cmd.hasOption("o")) { outputDirName = cmd.getOptionValue("o"); } else { Usage("Specify 'index directory'", options); } if (cmd.hasOption("r")) { qrelFileName = cmd.getOptionValue("r"); } String sourceName = cmd.getOptionValue("source_type"); if (sourceName == null) Usage("Specify document source type", options); if (qrelFileName != null) qrelWriter = new BufferedWriter(new FileWriter(qrelFileName)); File outputDir = new File(outputDirName); if (!outputDir.exists()) { if (!outputDir.mkdirs()) { System.out.println("couldn't create " + outputDir.getAbsolutePath()); System.exit(1); } } if (!outputDir.isDirectory()) { System.out.println(outputDir.getAbsolutePath() + " is not a directory!"); System.exit(1); } if (!outputDir.canWrite()) { System.out.println("Can't write to " + outputDir.getAbsolutePath()); System.exit(1); } boolean useFixedBM25 = cmd.hasOption("bm25fixed"); float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT; if (cmd.hasOption("bm25_k1")) { try { bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1")); } catch (NumberFormatException e) { Usage("Wrong format for 'bm25_k1'", options); } } if (cmd.hasOption("bm25_b")) { try { bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b")); } catch (NumberFormatException e) { Usage("Wrong format for 'bm25_b'", options); } } EnglishAnalyzer analyzer = new EnglishAnalyzer(); FSDirectory indexDir = FSDirectory.open(Paths.get(outputDirName)); IndexWriterConfig indexConf = new IndexWriterConfig(analyzer); /* OpenMode.CREATE creates a new index or overwrites an existing one. https://lucene.apache.org/core/6_0_0/core/org/apache/lucene/index/IndexWriterConfig.OpenMode.html#CREATE */ indexConf.setOpenMode(OpenMode.CREATE); indexConf.setRAMBufferSizeMB(ramBufferSizeMB); System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b)); if (useFixedBM25) { System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b)); indexConf.setSimilarity(new BM25SimilarityFix(bm25_k1, bm25_b)); } else { System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b)); indexConf.setSimilarity(new BM25Similarity(bm25_k1, bm25_b)); } indexWriter = new IndexWriter(indexDir, indexConf); DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName); DocumentEntry inpDoc = null; TextCleaner textCleaner = new TextCleaner(null); while ((inpDoc = inpDocSource.next()) != null) { ++docNum; Document luceneDoc = new Document(); ArrayList<String> cleanedToks = textCleaner.cleanUp(inpDoc.mDocText); String cleanText = spaceJoin.join(cleanedToks); // System.out.println(inpDoc.mDocId); // System.out.println(cleanText); // System.out.println("=============================="); luceneDoc.add(new StringField(UtilConst.FIELD_ID, inpDoc.mDocId, Field.Store.YES)); luceneDoc.add(new TextField(UtilConst.FIELD_TEXT, cleanText, Field.Store.YES)); indexWriter.addDocument(luceneDoc); if (inpDoc.mIsRel != null && qrelWriter != null) { saveQrelOneEntry(qrelWriter, inpDoc.mQueryId, inpDoc.mDocId, inpDoc.mIsRel ? MAX_GRADE : 0); } if (docNum % 1000 == 0) System.out.println(String.format("Indexed %d documents", docNum)); } } catch (ParseException e) { e.printStackTrace(); Usage("Cannot parse arguments" + e, options); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } finally { System.out.println(String.format("Indexed %d documents", docNum)); try { if (null != indexWriter) indexWriter.close(); if (null != qrelWriter) qrelWriter.close(); } catch (IOException e) { System.err.println("IO exception: " + e); e.printStackTrace(); } } }
From source file:com.mapr.PurchaseLog.java
public static void main(String[] args) throws IOException { Options opts = new Options(); CmdLineParser parser = new CmdLineParser(opts); try {// www . j a v a 2s. co m parser.parseArgument(args); } catch (CmdLineException e) { System.err.println("Usage: -count <number>G|M|K [ -users number ] log-file user-profiles"); return; } Joiner withTab = Joiner.on("\t"); // first generate lots of user definitions SchemaSampler users = new SchemaSampler( Resources.asCharSource(Resources.getResource("user-schema.txt"), Charsets.UTF_8).read()); File userFile = File.createTempFile("user", "tsv"); BufferedWriter out = Files.newBufferedWriter(userFile.toPath(), Charsets.UTF_8); for (int i = 0; i < opts.users; i++) { out.write(withTab.join(users.sample())); out.newLine(); } out.close(); // now generate a session for each user Splitter onTabs = Splitter.on("\t"); Splitter onComma = Splitter.on(","); Random gen = new Random(); SchemaSampler intermediate = new SchemaSampler( Resources.asCharSource(Resources.getResource("hit_step.txt"), Charsets.UTF_8).read()); final int COUNTRY = users.getFieldNames().indexOf("country"); final int CAMPAIGN = intermediate.getFieldNames().indexOf("campaign_list"); final int SEARCH_TERMS = intermediate.getFieldNames().indexOf("search_keywords"); Preconditions.checkState(COUNTRY >= 0, "Need country field in user schema"); Preconditions.checkState(CAMPAIGN >= 0, "Need campaign_list field in step schema"); Preconditions.checkState(SEARCH_TERMS >= 0, "Need search_keywords field in step schema"); out = Files.newBufferedWriter(new File(opts.out).toPath(), Charsets.UTF_8); for (String line : Files.readAllLines(userFile.toPath(), Charsets.UTF_8)) { long t = (long) (TimeUnit.MILLISECONDS.convert(30, TimeUnit.DAYS) * gen.nextDouble()); List<String> user = Lists.newArrayList(onTabs.split(line)); // pick session length int n = (int) Math.floor(-30 * Math.log(gen.nextDouble())); for (int i = 0; i < n; i++) { // time on page int dt = (int) Math.floor(-20000 * Math.log(gen.nextDouble())); t += dt; // hit specific values JsonNode step = intermediate.sample(); // check for purchase double p = 0.01; List<String> campaigns = Lists.newArrayList(onComma.split(step.get("campaign_list").asText())); List<String> keywords = Lists.newArrayList(onComma.split(step.get("search_keywords").asText())); if ((user.get(COUNTRY).equals("us") && campaigns.contains("5")) || (user.get(COUNTRY).equals("jp") && campaigns.contains("7")) || keywords.contains("homer") || keywords.contains("simpson")) { p = 0.5; } String events = gen.nextDouble() < p ? "1" : "-"; out.write(Long.toString(t)); out.write("\t"); out.write(line); out.write("\t"); out.write(withTab.join(step)); out.write("\t"); out.write(events); out.write("\n"); } } out.close(); }
From source file:apps.LuceneQuery.java
public static void main(String[] args) { Options options = new Options(); options.addOption("d", null, true, "index directory"); options.addOption("i", null, true, "input file"); options.addOption("s", null, true, "stop word file"); options.addOption("n", null, true, "max # of results"); options.addOption("o", null, true, "a TREC-style output file"); options.addOption("r", null, true, "an optional QREL file, if specified," + "we save results only for queries for which we find at least one relevant entry."); options.addOption("prob", null, true, "question sampling probability"); options.addOption("max_query_qty", null, true, "a maximum number of queries to run"); options.addOption("bm25_b", null, true, "BM25 parameter: b"); options.addOption("bm25_k1", null, true, "BM25 parameter: k1"); options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity"); options.addOption("seed", null, true, "random seed"); Joiner commaJoin = Joiner.on(','); Joiner spaceJoin = Joiner.on(' '); options.addOption("source_type", null, true, "query source type: " + commaJoin.join(SourceFactory.getQuerySourceList())); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); QrelReader qrels = null;/* w w w .j ava 2s. co m*/ try { CommandLine cmd = parser.parse(options, args); String indexDir = null; if (cmd.hasOption("d")) { indexDir = cmd.getOptionValue("d"); } else { Usage("Specify 'index directory'", options); } String inputFileName = null; if (cmd.hasOption("i")) { inputFileName = cmd.getOptionValue("i"); } else { Usage("Specify 'input file'", options); } DictNoComments stopWords = null; if (cmd.hasOption("s")) { String stopWordFileName = cmd.getOptionValue("s"); stopWords = new DictNoComments(new File(stopWordFileName), true /* lowercasing */); System.out.println("Using the stopword file: " + stopWordFileName); } String sourceName = cmd.getOptionValue("source_type"); if (sourceName == null) Usage("Specify document source type", options); int numRet = 100; if (cmd.hasOption("n")) { numRet = Integer.parseInt(cmd.getOptionValue("n")); System.out.println("Retrieving at most " + numRet + " candidate entries."); } String trecOutFileName = null; if (cmd.hasOption("o")) { trecOutFileName = cmd.getOptionValue("o"); } else { Usage("Specify 'a TREC-style output file'", options); } double fProb = 1.0f; if (cmd.hasOption("prob")) { try { fProb = Double.parseDouble(cmd.getOptionValue("prob")); } catch (NumberFormatException e) { Usage("Wrong format for 'question sampling probability'", options); } } if (fProb <= 0 || fProb > 1) { Usage("Question sampling probability should be >0 and <=1", options); } System.out.println("Sample the following fraction of questions: " + fProb); float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT; if (cmd.hasOption("bm25_k1")) { try { bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1")); } catch (NumberFormatException e) { Usage("Wrong format for 'bm25_k1'", options); } } if (cmd.hasOption("bm25_b")) { try { bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b")); } catch (NumberFormatException e) { Usage("Wrong format for 'bm25_b'", options); } } long seed = 0; String tmpl = cmd.getOptionValue("seed"); if (tmpl != null) seed = Long.parseLong(tmpl); System.out.println("Using seed: " + seed); Random randGen = new Random(seed); System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b)); boolean useFixedBM25 = cmd.hasOption("bm25fixed"); EnglishAnalyzer analyzer = new EnglishAnalyzer(); Similarity similarity = null; if (useFixedBM25) { System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b)); similarity = new BM25SimilarityFix(bm25_k1, bm25_b); } else { System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b)); similarity = new BM25Similarity(bm25_k1, bm25_b); } int maxQueryQty = Integer.MAX_VALUE; if (cmd.hasOption("max_query_qty")) { try { maxQueryQty = Integer.parseInt(cmd.getOptionValue("max_query_qty")); } catch (NumberFormatException e) { Usage("Wrong format for 'max_query_qty'", options); } } System.out.println(String.format("Executing at most %d queries", maxQueryQty)); if (cmd.hasOption("r")) { String qrelFile = cmd.getOptionValue("r"); System.out.println("Using the qrel file: '" + qrelFile + "', queries not returning a relevant entry will be ignored."); qrels = new QrelReader(qrelFile); } System.out.println(String.format("Using indexing directory %s", indexDir)); LuceneCandidateProvider candProvider = new LuceneCandidateProvider(indexDir, analyzer, similarity); TextCleaner textCleaner = new TextCleaner(stopWords); QuerySource inpQuerySource = SourceFactory.createQuerySource(sourceName, inputFileName); QueryEntry inpQuery = null; BufferedWriter trecOutFile = new BufferedWriter(new FileWriter(new File(trecOutFileName))); int questNum = 0, questQty = 0; long totalTimeMS = 0; while ((inpQuery = inpQuerySource.next()) != null) { if (questQty >= maxQueryQty) break; ++questNum; String queryID = inpQuery.mQueryId; if (randGen.nextDouble() <= fProb) { ++questQty; String tokQuery = spaceJoin.join(textCleaner.cleanUp(inpQuery.mQueryText)); String query = TextCleaner.luceneSafeCleanUp(tokQuery).trim(); ResEntry[] results = null; if (query.isEmpty()) { results = new ResEntry[0]; System.out.println(String.format("WARNING, empty query id = '%s'", inpQuery.mQueryId)); } else { try { long start = System.currentTimeMillis(); results = candProvider.getCandidates(questNum, query, numRet); long end = System.currentTimeMillis(); long searchTimeMS = end - start; totalTimeMS += searchTimeMS; System.out.println(String.format( "Obtained results for the query # %d (answered %d queries), queryID %s the search took %d ms, we asked for max %d entries got %d", questNum, questQty, queryID, searchTimeMS, numRet, results.length)); } catch (ParseException e) { e.printStackTrace(); System.err.println( "Error parsing query: " + query + " orig question is :" + inpQuery.mQueryText); System.exit(1); } } boolean bSave = true; if (qrels != null) { boolean bOk = false; for (ResEntry r : results) { String label = qrels.get(queryID, r.mDocId); if (candProvider.isRelevLabel(label, 1)) { bOk = true; break; } } if (!bOk) bSave = false; } // System.out.println(String.format("Ranking results the query # %d queryId='%s' save results? %b", // questNum, queryID, bSave)); if (bSave) { saveTrecResults(queryID, results, trecOutFile, TREC_RUN, numRet); } } if (questNum % 1000 == 0) System.out.println(String.format("Proccessed %d questions", questNum)); } System.out.println(String.format("Proccessed %d questions, the search took %f MS on average", questQty, (float) totalTimeMS / questQty)); trecOutFile.close(); } catch (ParseException e) { e.printStackTrace(); Usage("Cannot parse arguments: " + e, options); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:io.druid.server.sql.SQLRunner.java
public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption("h", "help", false, "help"); options.addOption("v", false, "verbose"); options.addOption("e", "host", true, "endpoint [hostname:port]"); CommandLine cmd = new GnuParser().parse(options, args); if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("SQLRunner", options); System.exit(2);/*w ww . j a v a 2s . c o m*/ } String hostname = cmd.getOptionValue("e", "localhost:8080"); String sql = cmd.getArgs().length > 0 ? cmd.getArgs()[0] : STATEMENT; ObjectMapper objectMapper = new DefaultObjectMapper(); ObjectWriter jsonWriter = objectMapper.writerWithDefaultPrettyPrinter(); CharStream stream = new ANTLRInputStream(sql); DruidSQLLexer lexer = new DruidSQLLexer(stream); TokenStream tokenStream = new CommonTokenStream(lexer); DruidSQLParser parser = new DruidSQLParser(tokenStream); lexer.removeErrorListeners(); parser.removeErrorListeners(); lexer.addErrorListener(ConsoleErrorListener.INSTANCE); parser.addErrorListener(ConsoleErrorListener.INSTANCE); try { DruidSQLParser.QueryContext queryContext = parser.query(); if (parser.getNumberOfSyntaxErrors() > 0) throw new IllegalStateException(); // parser.setBuildParseTree(true); // System.err.println(q.toStringTree(parser)); } catch (Exception e) { String msg = e.getMessage(); if (msg != null) System.err.println(e); System.exit(1); } final Query query; final TypeReference typeRef; boolean groupBy = false; if (parser.groupByDimensions.isEmpty()) { query = Druids.newTimeseriesQueryBuilder().dataSource(parser.getDataSource()) .aggregators(new ArrayList<AggregatorFactory>(parser.aggregators.values())) .postAggregators(parser.postAggregators).intervals(parser.intervals) .granularity(parser.granularity).filters(parser.filter).build(); typeRef = new TypeReference<List<Result<TimeseriesResultValue>>>() { }; } else { query = GroupByQuery.builder().setDataSource(parser.getDataSource()) .setAggregatorSpecs(new ArrayList<AggregatorFactory>(parser.aggregators.values())) .setPostAggregatorSpecs(parser.postAggregators).setInterval(parser.intervals) .setGranularity(parser.granularity).setDimFilter(parser.filter) .setDimensions(new ArrayList<DimensionSpec>(parser.groupByDimensions.values())).build(); typeRef = new TypeReference<List<Row>>() { }; groupBy = true; } String queryStr = jsonWriter.writeValueAsString(query); if (cmd.hasOption("v")) System.err.println(queryStr); URL url = new URL(String.format("http://%s/druid/v2/?pretty", hostname)); final URLConnection urlConnection = url.openConnection(); urlConnection.addRequestProperty("content-type", MediaType.APPLICATION_JSON); urlConnection.getOutputStream().write(StringUtils.toUtf8(queryStr)); BufferedReader stdInput = new BufferedReader( new InputStreamReader(urlConnection.getInputStream(), Charsets.UTF_8)); Object res = objectMapper.readValue(stdInput, typeRef); Joiner tabJoiner = Joiner.on("\t"); if (groupBy) { List<Row> rows = (List<Row>) res; Iterable<String> dimensions = Iterables.transform(parser.groupByDimensions.values(), new Function<DimensionSpec, String>() { @Override public String apply(@Nullable DimensionSpec input) { return input.getOutputName(); } }); System.out.println( tabJoiner.join(Iterables.concat(Lists.newArrayList("timestamp"), dimensions, parser.fields))); for (final Row r : rows) { System.out.println(tabJoiner.join(Iterables.concat( Lists.newArrayList(parser.granularity.toDateTime(r.getTimestampFromEpoch())), Iterables.transform(parser.groupByDimensions.values(), new Function<DimensionSpec, String>() { @Override public String apply(@Nullable DimensionSpec input) { return Joiner.on(",").join(r.getDimension(input.getOutputName())); } }), Iterables.transform(parser.fields, new Function<String, Object>() { @Override public Object apply(@Nullable String input) { return r.getFloatMetric(input); } })))); } } else { List<Result<TimeseriesResultValue>> rows = (List<Result<TimeseriesResultValue>>) res; System.out.println(tabJoiner.join(Iterables.concat(Lists.newArrayList("timestamp"), parser.fields))); for (final Result<TimeseriesResultValue> r : rows) { System.out.println(tabJoiner.join(Iterables.concat(Lists.newArrayList(r.getTimestamp()), Lists.transform(parser.fields, new Function<String, Object>() { @Override public Object apply(@Nullable String input) { return r.getValue().getMetric(input); } })))); } } CloseQuietly.close(stdInput); }
From source file:org.opensha.nshmp2.erf.source.PointSource13b.java
public static void main(String[] args) { System.out.println(NSHMP_Util.getMeanRJB(6.05, 1.0)); // double dist = 6.5; // double xmag = 6.05; // /*from w w w. ja v a2s .co m*/ // double dr_rjb = 1.0; // historic context; could be dropped // double dm_rjb = 0.1; // double xmmin_rjb = 6.05; // // int irjb = (int) (dist/dr_rjb+1); // // int m_ind = 1 + Math.max(0,(int) Math.rint((xmag-xmmin_rjb)/dm_rjb)); // m_ind= Math.min(26,m_ind); // System.out.println("m_ind: " + m_ind); // System.out.println("irjb: " + irjb); // // System.out.println("===="); // double mCorr = Math.round(xmag/0.05)*0.05; // double r = NSHMP_Util.getMeanRJB(mCorr, dist); // System.out.println(r); double Mw = 7.45; SingleMagFreqDist mfd = new SingleMagFreqDist(Mw, 1, 0.1, Mw, 1); Location srcLoc = new Location(31.6, -117.1); Location siteLoc = new Location(31.6, -117.105); double[] depths = new double[] { 5.0, 1.0 }; Map<FocalMech, Double> mechMap = Maps.newHashMap(); mechMap.put(FocalMech.STRIKE_SLIP, 0.0); mechMap.put(FocalMech.REVERSE, 0.0); mechMap.put(FocalMech.NORMAL, 1.0); PointSource13b ptSrc = new PointSource13b(srcLoc, mfd, 1.0, depths, mechMap); Joiner J = Joiner.on(" "); for (ProbEqkRupture rup : ptSrc) { PointSurface13b surf = (PointSurface13b) rup.getRuptureSurface(); List<Double> attr = Lists.newArrayList(rup.getMag(), rup.getAveRake(), surf.getAveDip(), surf.zTop, surf.zBot, surf.widthH, surf.widthDD, surf.getDistanceJB(siteLoc), surf.getDistanceRup(siteLoc), surf.getDistanceX(siteLoc)); System.out.println(J.join(attr) + " " + surf.footwall); } }
From source file:apps.QueryReaderFactory.java
public static void main(String[] argv) { if (argv.length != 3) { System.err.println("Usage: <input file> <input type> <output file>"); }/*from w w w .ja v a2s .co m*/ BufferedWriter outputFile = null; try { QueryReader qr = createReader(argv[0], argv[1]); outputFile = new BufferedWriter(new OutputStreamWriter(CompressUtils.createOutputStream(argv[2]))); XmlHelper xmlHlp = new XmlHelper(); TextCleaner textCleaner = new TextCleaner( new DictNoComments(new File("data/stopwords.txt"), true /* lower case */), Source2XML.USE_STANFORD, Source2XML.USE_LEMMATIZER); Joiner spaceJoin = Joiner.on(' '); //Stemmer stemmer = new Stemmer(); KrovetzStemmer stemmer = new KrovetzStemmer(); System.out.println("Using Stanford NLP? " + Source2XML.USE_STANFORD); System.out.println("Using Stanford lemmatizer? " + Source2XML.USE_LEMMATIZER); System.out.println("Using stemmer? " + Source2XML.USE_STEMMER + (Source2XML.USE_STEMMER ? " (class: " + stemmer.getClass().getCanonicalName() + ")" : "")); for (int qid = 0; qid < qr.getQueryQty(); ++qid) { Map<String, String> outputMap = new HashMap<String, String>(); ArrayList<String> toks = textCleaner.cleanUp(qr.getQuery(qid)); outputMap.put(UtilConst.XML_FIELD_DOCNO, qr.getQueryId(qid)); outputMap.put(UtilConst.XML_FIELD_TEXT, spaceJoin.join(toks)); String xml = xmlHlp.genXMLIndexEntry(outputMap); try { outputFile.write(xml); outputFile.write(NL); } catch (Exception e) { e.printStackTrace(); System.err.println("Error processing/saving a document!"); } } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); System.err.println("Terminated due to exception: " + e); System.exit(1); } finally { try { if (null != outputFile) outputFile.close(); } catch (IOException e) { System.err.println("IO exception: " + e); e.printStackTrace(); } } }
From source file:umd.twittertools.download.DataForHua.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("string").hasArg().withDescription("host").create(HOST_OPTION)); options.addOption(OptionBuilder.withArgName("port").hasArg().withDescription("port").create(PORT_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing topics in TREC format").create(QUERIES_OPTION)); options.addOption(/*from w ww . j a v a 2 s . c o m*/ OptionBuilder.withArgName("string").hasArg().withDescription("qrels file").create(QRELS_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("group id").create(GROUP_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("access token").create(TOKEN_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(HOST_OPTION) || !cmdline.hasOption(PORT_OPTION) || !cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(QRELS_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(DataForHua.class.getName(), options); System.exit(-1); } String queryFile = cmdline.getOptionValue(QUERIES_OPTION); if (!new File(queryFile).exists()) { System.err.println("Error: " + queryFile + " doesn't exist!"); System.exit(-1); } String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; TrecTopicSet topicsFile = TrecTopicSet.fromFile(new File(queryFile)); int numResults = 10000; try { if (cmdline.hasOption(NUM_RESULTS_OPTION)) { numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)); } } catch (NumberFormatException e) { System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION)); System.exit(-1); } String group = cmdline.hasOption(GROUP_OPTION) ? cmdline.getOptionValue(GROUP_OPTION) : null; String token = cmdline.hasOption(TOKEN_OPTION) ? cmdline.getOptionValue(TOKEN_OPTION) : null; boolean verbose = cmdline.hasOption(VERBOSE_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); TrecSearchThriftClient client = new TrecSearchThriftClient(cmdline.getOptionValue(HOST_OPTION), Integer.parseInt(cmdline.getOptionValue(PORT_OPTION)), group, token); TweetAnalyzer tokenizer = new TweetAnalyzer(Version.LUCENE_43, false); // no stemming Joiner joiner = Joiner.on(' '); String qrelsFile = cmdline.getOptionValue(QRELS_OPTION); Table<Integer, Long, Integer> groundTruth = RunTemporalModel.loadGroundTruth(qrelsFile); for (cc.twittertools.search.TrecTopic query : topicsFile) { List<TResult> results = client.search(query.getQuery(), query.getQueryTweetTime(), numResults); int i = 1; Set<Long> tweetIds = new HashSet<Long>(); for (TResult result : results) { if (!tweetIds.contains(result.id)) { // The TREC official qrels don't have the "MB" prefix and trailing zeros, so we perform // this transformation so that trec_eval doesn't complain. tweetIds.add(result.id); Integer qid = Integer.parseInt(query.getId().replaceFirst("^MB0*", "")); if (groundTruth.contains(qid, result.id)) { String qtext = joiner.join(LuceneTokenizer .tokenize(tokenizer.tokenStream("text", new StringReader(query.getQuery())))); String tweetText = joiner.join(LuceneTokenizer .tokenize(tokenizer.tokenStream("text", new StringReader(result.text)))); int label = groundTruth.get(qid, result.id); out.println(String.format("%d@%d@%d@%f@%s@%s", qid, result.id, label, result.rsv, qtext, tweetText)); } i++; } } } out.close(); }
From source file:edu.cmu.lti.oaqa.bio.index.medline.annotated.query.SimpleQueryApp.java
public static void main(String[] args) { Options options = new Options(); options.addOption("u", null, true, "Solr URI"); options.addOption("n", null, true, "Max # of results"); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); try {// w ww . j av a2 s . c om CommandLine cmd = parser.parse(options, args); String solrURI = null; solrURI = cmd.getOptionValue("u"); if (solrURI == null) { Usage("Specify Solr URI"); } SolrServerWrapper solr = new SolrServerWrapper(solrURI); int numRet = 10; if (cmd.hasOption("n")) { numRet = Integer.parseInt(cmd.getOptionValue("n")); } List<String> fieldList = new ArrayList<String>(); fieldList.add(UtilConstMedline.ID_FIELD); fieldList.add(UtilConstMedline.SCORE_FIELD); fieldList.add(UtilConstMedline.ARTICLE_TITLE_FIELD); fieldList.add(UtilConstMedline.ENTITIES_DESC_FIELD); fieldList.add(UtilConstMedline.ABSTRACT_TEXT_FIELD); BufferedReader sysInReader = new BufferedReader(new InputStreamReader(System.in)); Joiner commaJoiner = Joiner.on(','); while (true) { System.out.println("Input query: "); String query = sysInReader.readLine(); if (null == query) break; QueryTransformer qt = new QueryTransformer(query); String tranQuery = qt.getQuery(); System.out.println("Translated query:"); System.out.println(tranQuery); System.out.println("========================="); SolrDocumentList res = solr.runQuery(tranQuery, fieldList, numRet); System.out.println("Found " + res.getNumFound() + " entries"); for (SolrDocument doc : res) { String id = (String) doc.getFieldValue(UtilConstMedline.ID_FIELD); float score = (Float) doc.getFieldValue(UtilConstMedline.SCORE_FIELD); String title = (String) doc.getFieldValue(UtilConstMedline.ARTICLE_TITLE_FIELD); String titleAbstract = (String) doc.getFieldValue(UtilConstMedline.ABSTRACT_TEXT_FIELD); System.out.println(score + " PMID=" + id + " " + titleAbstract); String entityDesc = (String) doc.getFieldValue(UtilConstMedline.ENTITIES_DESC_FIELD); System.out.println("Entities:"); for (EntityEntry e : EntityEntry.parseEntityDesc(entityDesc)) { System.out.println(String.format("[%d %d] concept=%s concept_ids=%s", e.mStart, e.mEnd, e.mConcept, commaJoiner.join(e.mConceptIds))); } } } solr.close(); } catch (ParseException e) { Usage("Cannot parse arguments"); } catch (Exception e) { System.err.println("Terminating due to an exception: " + e); System.exit(1); } }