Example usage for com.google.common.base Joiner join

List of usage examples for com.google.common.base Joiner join

Introduction

In this page you can find the example usage for com.google.common.base Joiner join.

Prototype

@CheckReturnValue
public final String join(Object[] parts) 

Source Link

Document

Returns a string containing the string representation of each of parts , using the previously configured separator between each.

Usage

From source file:pl.coffeepower.blog.examples.LambdaExpExample.java

public static void main(String[] args) {
    LambdaExpExample example = new LambdaExpExample();
    Joiner joiner = Joiner.on(", ");
    log.info("numbers: " + joiner.join(example.getNumbers()));
    log.info("odd numbers: " + NumberUtils.getOddNumbers(example.getNumbers()));
    log.info("even numbers: " + NumberUtils.getEvenNumbers(example.getNumbers()));
}

From source file:apps.Source2XML.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("i", null, true, "input file");
    options.addOption("o", null, true, "output file");
    options.addOption("reparse_xml", null, false, "reparse each XML entry to ensure the parser doesn't fail");

    Joiner commaJoin = Joiner.on(',');

    options.addOption("source_type", null, true,
            "document source type: " + commaJoin.join(SourceFactory.getDocSourceList()));

    Joiner spaceJoin = Joiner.on(' ');

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    BufferedWriter outputFile = null;

    int docNum = 0;

    if (USE_LEMMATIZER && USE_STEMMER) {
        System.err.println("Bug/inconsistent code: cann't use the stemmer and lemmatizer at the same time!");
        System.exit(1);/*from  ww w .j a v a 2s  .c o m*/
    }

    //Stemmer stemmer = new Stemmer();
    KrovetzStemmer stemmer = new KrovetzStemmer();

    System.out.println("Using Stanford NLP?        " + USE_STANFORD);
    System.out.println("Using Stanford lemmatizer? " + USE_LEMMATIZER);
    System.out.println("Using stemmer?             " + USE_STEMMER
            + (USE_STEMMER ? " (class: " + stemmer.getClass().getCanonicalName() + ")" : ""));

    try {
        CommandLine cmd = parser.parse(options, args);

        String inputFileName = null, outputFileName = null;

        if (cmd.hasOption("i")) {
            inputFileName = cmd.getOptionValue("i");
        } else {
            Usage("Specify 'input file'", options);
        }

        if (cmd.hasOption("o")) {
            outputFileName = cmd.getOptionValue("o");
        } else {
            Usage("Specify 'output file'", options);
        }

        outputFile = new BufferedWriter(
                new OutputStreamWriter(CompressUtils.createOutputStream(outputFileName)));

        String sourceName = cmd.getOptionValue("source_type");

        if (sourceName == null)
            Usage("Specify document source type", options);

        boolean reparseXML = options.hasOption("reparse_xml");

        DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName);
        DocumentEntry inpDoc = null;
        TextCleaner textCleaner = new TextCleaner(
                new DictNoComments(new File("data/stopwords.txt"), true /* lower case */), USE_STANFORD,
                USE_LEMMATIZER);

        Map<String, String> outputMap = new HashMap<String, String>();

        outputMap.put(UtilConst.XML_FIELD_DOCNO, null);
        outputMap.put(UtilConst.XML_FIELD_TEXT, null);

        XmlHelper xmlHlp = new XmlHelper();

        if (reparseXML)
            System.out.println("Will reparse every XML entry to verify correctness!");

        while ((inpDoc = inpDocSource.next()) != null) {
            ++docNum;

            ArrayList<String> toks = textCleaner.cleanUp(inpDoc.mDocText);
            ArrayList<String> goodToks = new ArrayList<String>();
            for (String s : toks)
                if (s.length() <= MAX_WORD_LEN && // Exclude long and short words
                        s.length() >= MIN_WORD_LEN && isGoodWord(s))
                    goodToks.add(USE_STEMMER ? stemmer.stem(s) : s);

            String partlyCleanedText = spaceJoin.join(goodToks);
            String cleanText = XmlHelper.removeInvaildXMLChars(partlyCleanedText);
            // isGoodWord combiend with Stanford tokenizer should be quite restrictive already
            //cleanText = replaceSomePunct(cleanText);

            outputMap.replace(UtilConst.XML_FIELD_DOCNO, inpDoc.mDocId);
            outputMap.replace(UtilConst.XML_FIELD_TEXT, cleanText);

            String xml = xmlHlp.genXMLIndexEntry(outputMap);

            if (reparseXML) {
                try {
                    XmlHelper.parseDocWithoutXMLDecl(xml);
                } catch (Exception e) {
                    System.err.println("Error re-parsing xml for document ID: " + inpDoc.mDocId);
                    System.exit(1);
                }
            }

            /*
            {
              System.out.println(inpDoc.mDocId);
              System.out.println("=====================");
              System.out.println(partlyCleanedText);
              System.out.println("=====================");
              System.out.println(cleanText);
            } 
            */

            try {
                outputFile.write(xml);
                outputFile.write(NL);
            } catch (Exception e) {
                e.printStackTrace();
                System.err.println("Error processing/saving a document!");
            }

            if (docNum % 1000 == 0)
                System.out.println(String.format("Processed %d documents", docNum));
        }

    } catch (ParseException e) {
        e.printStackTrace();
        Usage("Cannot parse arguments" + e, options);
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    } finally {
        System.out.println(String.format("Processed %d documents", docNum));

        try {
            if (null != outputFile) {
                outputFile.close();
                System.out.println("Output file is closed! all seems to be fine...");
            }
        } catch (IOException e) {
            System.err.println("IO exception: " + e);
            e.printStackTrace();
        }
    }
}

From source file:apps.LuceneIndexer.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("i", null, true, "input file");
    options.addOption("o", null, true, "output directory");
    options.addOption("r", null, true, "optional output TREC-format QREL file");

    options.addOption("bm25_b", null, true, "BM25 parameter: b");
    options.addOption("bm25_k1", null, true, "BM25 parameter: k1");
    options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity");

    Joiner commaJoin = Joiner.on(',');
    Joiner spaceJoin = Joiner.on(' ');

    options.addOption("source_type", null, true,
            "document source type: " + commaJoin.join(SourceFactory.getDocSourceList()));

    // If you increase this value, you may need to modify the following line in *.sh file
    // export MAVEN_OPTS="-Xms8192m -server"
    double ramBufferSizeMB = 1024 * 8; // 8 GB

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    IndexWriter indexWriter = null;/*w w w.  ja  v a 2s  .co  m*/
    BufferedWriter qrelWriter = null;

    int docNum = 0;

    try {
        CommandLine cmd = parser.parse(options, args);

        String inputFileName = null, outputDirName = null, qrelFileName = null;

        if (cmd.hasOption("i")) {
            inputFileName = cmd.getOptionValue("i");
        } else {
            Usage("Specify 'input file'", options);
        }

        if (cmd.hasOption("o")) {
            outputDirName = cmd.getOptionValue("o");
        } else {
            Usage("Specify 'index directory'", options);
        }

        if (cmd.hasOption("r")) {
            qrelFileName = cmd.getOptionValue("r");
        }

        String sourceName = cmd.getOptionValue("source_type");

        if (sourceName == null)
            Usage("Specify document source type", options);

        if (qrelFileName != null)
            qrelWriter = new BufferedWriter(new FileWriter(qrelFileName));

        File outputDir = new File(outputDirName);
        if (!outputDir.exists()) {
            if (!outputDir.mkdirs()) {
                System.out.println("couldn't create " + outputDir.getAbsolutePath());
                System.exit(1);
            }
        }
        if (!outputDir.isDirectory()) {
            System.out.println(outputDir.getAbsolutePath() + " is not a directory!");
            System.exit(1);
        }
        if (!outputDir.canWrite()) {
            System.out.println("Can't write to " + outputDir.getAbsolutePath());
            System.exit(1);
        }

        boolean useFixedBM25 = cmd.hasOption("bm25fixed");

        float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT;

        if (cmd.hasOption("bm25_k1")) {
            try {
                bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_k1'", options);
            }
        }

        if (cmd.hasOption("bm25_b")) {
            try {
                bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_b'", options);
            }
        }

        EnglishAnalyzer analyzer = new EnglishAnalyzer();
        FSDirectory indexDir = FSDirectory.open(Paths.get(outputDirName));
        IndexWriterConfig indexConf = new IndexWriterConfig(analyzer);

        /*
            OpenMode.CREATE creates a new index or overwrites an existing one.
            https://lucene.apache.org/core/6_0_0/core/org/apache/lucene/index/IndexWriterConfig.OpenMode.html#CREATE
        */
        indexConf.setOpenMode(OpenMode.CREATE);
        indexConf.setRAMBufferSizeMB(ramBufferSizeMB);

        System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b));

        if (useFixedBM25) {
            System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b));
            indexConf.setSimilarity(new BM25SimilarityFix(bm25_k1, bm25_b));
        } else {
            System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b));
            indexConf.setSimilarity(new BM25Similarity(bm25_k1, bm25_b));
        }

        indexWriter = new IndexWriter(indexDir, indexConf);

        DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName);
        DocumentEntry inpDoc = null;
        TextCleaner textCleaner = new TextCleaner(null);

        while ((inpDoc = inpDocSource.next()) != null) {
            ++docNum;

            Document luceneDoc = new Document();
            ArrayList<String> cleanedToks = textCleaner.cleanUp(inpDoc.mDocText);
            String cleanText = spaceJoin.join(cleanedToks);

            //        System.out.println(inpDoc.mDocId);
            //        System.out.println(cleanText);
            //        System.out.println("==============================");

            luceneDoc.add(new StringField(UtilConst.FIELD_ID, inpDoc.mDocId, Field.Store.YES));
            luceneDoc.add(new TextField(UtilConst.FIELD_TEXT, cleanText, Field.Store.YES));
            indexWriter.addDocument(luceneDoc);

            if (inpDoc.mIsRel != null && qrelWriter != null) {
                saveQrelOneEntry(qrelWriter, inpDoc.mQueryId, inpDoc.mDocId, inpDoc.mIsRel ? MAX_GRADE : 0);
            }
            if (docNum % 1000 == 0)
                System.out.println(String.format("Indexed %d documents", docNum));

        }

    } catch (ParseException e) {
        e.printStackTrace();
        Usage("Cannot parse arguments" + e, options);
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    } finally {
        System.out.println(String.format("Indexed %d documents", docNum));

        try {
            if (null != indexWriter)
                indexWriter.close();
            if (null != qrelWriter)
                qrelWriter.close();
        } catch (IOException e) {
            System.err.println("IO exception: " + e);
            e.printStackTrace();
        }
    }
}

From source file:com.mapr.PurchaseLog.java

public static void main(String[] args) throws IOException {
    Options opts = new Options();
    CmdLineParser parser = new CmdLineParser(opts);
    try {//  www  .  j  a v  a  2s.  co  m
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println("Usage: -count <number>G|M|K [ -users number ]  log-file user-profiles");
        return;
    }

    Joiner withTab = Joiner.on("\t");

    // first generate lots of user definitions
    SchemaSampler users = new SchemaSampler(
            Resources.asCharSource(Resources.getResource("user-schema.txt"), Charsets.UTF_8).read());
    File userFile = File.createTempFile("user", "tsv");
    BufferedWriter out = Files.newBufferedWriter(userFile.toPath(), Charsets.UTF_8);
    for (int i = 0; i < opts.users; i++) {
        out.write(withTab.join(users.sample()));
        out.newLine();
    }
    out.close();

    // now generate a session for each user
    Splitter onTabs = Splitter.on("\t");
    Splitter onComma = Splitter.on(",");

    Random gen = new Random();
    SchemaSampler intermediate = new SchemaSampler(
            Resources.asCharSource(Resources.getResource("hit_step.txt"), Charsets.UTF_8).read());

    final int COUNTRY = users.getFieldNames().indexOf("country");
    final int CAMPAIGN = intermediate.getFieldNames().indexOf("campaign_list");
    final int SEARCH_TERMS = intermediate.getFieldNames().indexOf("search_keywords");
    Preconditions.checkState(COUNTRY >= 0, "Need country field in user schema");
    Preconditions.checkState(CAMPAIGN >= 0, "Need campaign_list field in step schema");
    Preconditions.checkState(SEARCH_TERMS >= 0, "Need search_keywords field in step schema");

    out = Files.newBufferedWriter(new File(opts.out).toPath(), Charsets.UTF_8);

    for (String line : Files.readAllLines(userFile.toPath(), Charsets.UTF_8)) {
        long t = (long) (TimeUnit.MILLISECONDS.convert(30, TimeUnit.DAYS) * gen.nextDouble());
        List<String> user = Lists.newArrayList(onTabs.split(line));

        // pick session length
        int n = (int) Math.floor(-30 * Math.log(gen.nextDouble()));

        for (int i = 0; i < n; i++) {
            // time on page
            int dt = (int) Math.floor(-20000 * Math.log(gen.nextDouble()));
            t += dt;

            // hit specific values
            JsonNode step = intermediate.sample();

            // check for purchase
            double p = 0.01;
            List<String> campaigns = Lists.newArrayList(onComma.split(step.get("campaign_list").asText()));
            List<String> keywords = Lists.newArrayList(onComma.split(step.get("search_keywords").asText()));
            if ((user.get(COUNTRY).equals("us") && campaigns.contains("5"))
                    || (user.get(COUNTRY).equals("jp") && campaigns.contains("7")) || keywords.contains("homer")
                    || keywords.contains("simpson")) {
                p = 0.5;
            }

            String events = gen.nextDouble() < p ? "1" : "-";

            out.write(Long.toString(t));
            out.write("\t");
            out.write(line);
            out.write("\t");
            out.write(withTab.join(step));
            out.write("\t");
            out.write(events);
            out.write("\n");
        }
    }
    out.close();
}

From source file:apps.LuceneQuery.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("d", null, true, "index directory");
    options.addOption("i", null, true, "input file");
    options.addOption("s", null, true, "stop word file");
    options.addOption("n", null, true, "max # of results");
    options.addOption("o", null, true, "a TREC-style output file");
    options.addOption("r", null, true, "an optional QREL file, if specified,"
            + "we save results only for queries for which we find at least one relevant entry.");

    options.addOption("prob", null, true, "question sampling probability");
    options.addOption("max_query_qty", null, true, "a maximum number of queries to run");
    options.addOption("bm25_b", null, true, "BM25 parameter: b");
    options.addOption("bm25_k1", null, true, "BM25 parameter: k1");
    options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity");

    options.addOption("seed", null, true, "random seed");

    Joiner commaJoin = Joiner.on(',');
    Joiner spaceJoin = Joiner.on(' ');

    options.addOption("source_type", null, true,
            "query source type: " + commaJoin.join(SourceFactory.getQuerySourceList()));

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    QrelReader qrels = null;/* w w w .j ava 2s.  co  m*/

    try {

        CommandLine cmd = parser.parse(options, args);

        String indexDir = null;

        if (cmd.hasOption("d")) {
            indexDir = cmd.getOptionValue("d");
        } else {
            Usage("Specify 'index directory'", options);
        }

        String inputFileName = null;

        if (cmd.hasOption("i")) {
            inputFileName = cmd.getOptionValue("i");
        } else {
            Usage("Specify 'input file'", options);
        }

        DictNoComments stopWords = null;

        if (cmd.hasOption("s")) {
            String stopWordFileName = cmd.getOptionValue("s");
            stopWords = new DictNoComments(new File(stopWordFileName), true /* lowercasing */);
            System.out.println("Using the stopword file: " + stopWordFileName);
        }

        String sourceName = cmd.getOptionValue("source_type");

        if (sourceName == null)
            Usage("Specify document source type", options);

        int numRet = 100;

        if (cmd.hasOption("n")) {
            numRet = Integer.parseInt(cmd.getOptionValue("n"));
            System.out.println("Retrieving at most " + numRet + " candidate entries.");
        }

        String trecOutFileName = null;

        if (cmd.hasOption("o")) {
            trecOutFileName = cmd.getOptionValue("o");
        } else {
            Usage("Specify 'a TREC-style output file'", options);
        }

        double fProb = 1.0f;

        if (cmd.hasOption("prob")) {
            try {
                fProb = Double.parseDouble(cmd.getOptionValue("prob"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'question sampling probability'", options);
            }
        }

        if (fProb <= 0 || fProb > 1) {
            Usage("Question sampling probability should be >0 and <=1", options);
        }

        System.out.println("Sample the following fraction of questions: " + fProb);

        float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT;

        if (cmd.hasOption("bm25_k1")) {
            try {
                bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_k1'", options);
            }
        }

        if (cmd.hasOption("bm25_b")) {
            try {
                bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_b'", options);
            }
        }

        long seed = 0;

        String tmpl = cmd.getOptionValue("seed");

        if (tmpl != null)
            seed = Long.parseLong(tmpl);

        System.out.println("Using seed: " + seed);

        Random randGen = new Random(seed);

        System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b));

        boolean useFixedBM25 = cmd.hasOption("bm25fixed");

        EnglishAnalyzer analyzer = new EnglishAnalyzer();
        Similarity similarity = null;

        if (useFixedBM25) {
            System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b));
            similarity = new BM25SimilarityFix(bm25_k1, bm25_b);
        } else {
            System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b));
            similarity = new BM25Similarity(bm25_k1, bm25_b);
        }

        int maxQueryQty = Integer.MAX_VALUE;

        if (cmd.hasOption("max_query_qty")) {
            try {
                maxQueryQty = Integer.parseInt(cmd.getOptionValue("max_query_qty"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'max_query_qty'", options);
            }
        }

        System.out.println(String.format("Executing at most %d queries", maxQueryQty));

        if (cmd.hasOption("r")) {
            String qrelFile = cmd.getOptionValue("r");
            System.out.println("Using the qrel file: '" + qrelFile
                    + "', queries not returning a relevant entry will be ignored.");
            qrels = new QrelReader(qrelFile);
        }

        System.out.println(String.format("Using indexing directory %s", indexDir));

        LuceneCandidateProvider candProvider = new LuceneCandidateProvider(indexDir, analyzer, similarity);
        TextCleaner textCleaner = new TextCleaner(stopWords);

        QuerySource inpQuerySource = SourceFactory.createQuerySource(sourceName, inputFileName);
        QueryEntry inpQuery = null;

        BufferedWriter trecOutFile = new BufferedWriter(new FileWriter(new File(trecOutFileName)));

        int questNum = 0, questQty = 0;

        long totalTimeMS = 0;

        while ((inpQuery = inpQuerySource.next()) != null) {
            if (questQty >= maxQueryQty)
                break;
            ++questNum;

            String queryID = inpQuery.mQueryId;

            if (randGen.nextDouble() <= fProb) {
                ++questQty;

                String tokQuery = spaceJoin.join(textCleaner.cleanUp(inpQuery.mQueryText));
                String query = TextCleaner.luceneSafeCleanUp(tokQuery).trim();

                ResEntry[] results = null;

                if (query.isEmpty()) {
                    results = new ResEntry[0];
                    System.out.println(String.format("WARNING, empty query id = '%s'", inpQuery.mQueryId));
                } else {

                    try {
                        long start = System.currentTimeMillis();

                        results = candProvider.getCandidates(questNum, query, numRet);

                        long end = System.currentTimeMillis();
                        long searchTimeMS = end - start;
                        totalTimeMS += searchTimeMS;

                        System.out.println(String.format(
                                "Obtained results for the query # %d (answered %d queries), queryID %s the search took %d ms, we asked for max %d entries got %d",
                                questNum, questQty, queryID, searchTimeMS, numRet, results.length));

                    } catch (ParseException e) {
                        e.printStackTrace();
                        System.err.println(
                                "Error parsing query: " + query + " orig question is :" + inpQuery.mQueryText);
                        System.exit(1);
                    }
                }

                boolean bSave = true;

                if (qrels != null) {
                    boolean bOk = false;
                    for (ResEntry r : results) {
                        String label = qrels.get(queryID, r.mDocId);
                        if (candProvider.isRelevLabel(label, 1)) {
                            bOk = true;
                            break;
                        }
                    }
                    if (!bOk)
                        bSave = false;
                }

                //            System.out.println(String.format("Ranking results the query # %d queryId='%s' save results? %b", 
                //                                              questNum, queryID, bSave));          
                if (bSave) {
                    saveTrecResults(queryID, results, trecOutFile, TREC_RUN, numRet);
                }
            }

            if (questNum % 1000 == 0)
                System.out.println(String.format("Proccessed %d questions", questNum));

        }

        System.out.println(String.format("Proccessed %d questions, the search took %f MS on average", questQty,
                (float) totalTimeMS / questQty));

        trecOutFile.close();

    } catch (ParseException e) {
        e.printStackTrace();
        Usage("Cannot parse arguments: " + e, options);
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }
}

From source file:io.druid.server.sql.SQLRunner.java

public static void main(String[] args) throws Exception {

    Options options = new Options();
    options.addOption("h", "help", false, "help");
    options.addOption("v", false, "verbose");
    options.addOption("e", "host", true, "endpoint [hostname:port]");

    CommandLine cmd = new GnuParser().parse(options, args);

    if (cmd.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("SQLRunner", options);
        System.exit(2);/*w ww  .  j  a  v a 2s  . c  o  m*/
    }

    String hostname = cmd.getOptionValue("e", "localhost:8080");
    String sql = cmd.getArgs().length > 0 ? cmd.getArgs()[0] : STATEMENT;

    ObjectMapper objectMapper = new DefaultObjectMapper();
    ObjectWriter jsonWriter = objectMapper.writerWithDefaultPrettyPrinter();

    CharStream stream = new ANTLRInputStream(sql);
    DruidSQLLexer lexer = new DruidSQLLexer(stream);
    TokenStream tokenStream = new CommonTokenStream(lexer);
    DruidSQLParser parser = new DruidSQLParser(tokenStream);
    lexer.removeErrorListeners();
    parser.removeErrorListeners();

    lexer.addErrorListener(ConsoleErrorListener.INSTANCE);
    parser.addErrorListener(ConsoleErrorListener.INSTANCE);

    try {
        DruidSQLParser.QueryContext queryContext = parser.query();
        if (parser.getNumberOfSyntaxErrors() > 0)
            throw new IllegalStateException();
        //      parser.setBuildParseTree(true);
        //      System.err.println(q.toStringTree(parser));
    } catch (Exception e) {
        String msg = e.getMessage();
        if (msg != null)
            System.err.println(e);
        System.exit(1);
    }

    final Query query;
    final TypeReference typeRef;
    boolean groupBy = false;
    if (parser.groupByDimensions.isEmpty()) {
        query = Druids.newTimeseriesQueryBuilder().dataSource(parser.getDataSource())
                .aggregators(new ArrayList<AggregatorFactory>(parser.aggregators.values()))
                .postAggregators(parser.postAggregators).intervals(parser.intervals)
                .granularity(parser.granularity).filters(parser.filter).build();

        typeRef = new TypeReference<List<Result<TimeseriesResultValue>>>() {
        };
    } else {
        query = GroupByQuery.builder().setDataSource(parser.getDataSource())
                .setAggregatorSpecs(new ArrayList<AggregatorFactory>(parser.aggregators.values()))
                .setPostAggregatorSpecs(parser.postAggregators).setInterval(parser.intervals)
                .setGranularity(parser.granularity).setDimFilter(parser.filter)
                .setDimensions(new ArrayList<DimensionSpec>(parser.groupByDimensions.values())).build();

        typeRef = new TypeReference<List<Row>>() {
        };
        groupBy = true;
    }

    String queryStr = jsonWriter.writeValueAsString(query);
    if (cmd.hasOption("v"))
        System.err.println(queryStr);

    URL url = new URL(String.format("http://%s/druid/v2/?pretty", hostname));
    final URLConnection urlConnection = url.openConnection();
    urlConnection.addRequestProperty("content-type", MediaType.APPLICATION_JSON);
    urlConnection.getOutputStream().write(StringUtils.toUtf8(queryStr));
    BufferedReader stdInput = new BufferedReader(
            new InputStreamReader(urlConnection.getInputStream(), Charsets.UTF_8));

    Object res = objectMapper.readValue(stdInput, typeRef);

    Joiner tabJoiner = Joiner.on("\t");

    if (groupBy) {
        List<Row> rows = (List<Row>) res;
        Iterable<String> dimensions = Iterables.transform(parser.groupByDimensions.values(),
                new Function<DimensionSpec, String>() {
                    @Override
                    public String apply(@Nullable DimensionSpec input) {
                        return input.getOutputName();
                    }
                });

        System.out.println(
                tabJoiner.join(Iterables.concat(Lists.newArrayList("timestamp"), dimensions, parser.fields)));
        for (final Row r : rows) {
            System.out.println(tabJoiner.join(Iterables.concat(
                    Lists.newArrayList(parser.granularity.toDateTime(r.getTimestampFromEpoch())),
                    Iterables.transform(parser.groupByDimensions.values(),
                            new Function<DimensionSpec, String>() {
                                @Override
                                public String apply(@Nullable DimensionSpec input) {
                                    return Joiner.on(",").join(r.getDimension(input.getOutputName()));
                                }
                            }),
                    Iterables.transform(parser.fields, new Function<String, Object>() {
                        @Override
                        public Object apply(@Nullable String input) {
                            return r.getFloatMetric(input);
                        }
                    }))));
        }
    } else {
        List<Result<TimeseriesResultValue>> rows = (List<Result<TimeseriesResultValue>>) res;
        System.out.println(tabJoiner.join(Iterables.concat(Lists.newArrayList("timestamp"), parser.fields)));
        for (final Result<TimeseriesResultValue> r : rows) {
            System.out.println(tabJoiner.join(Iterables.concat(Lists.newArrayList(r.getTimestamp()),
                    Lists.transform(parser.fields, new Function<String, Object>() {
                        @Override
                        public Object apply(@Nullable String input) {
                            return r.getValue().getMetric(input);
                        }
                    }))));
        }
    }

    CloseQuietly.close(stdInput);
}

From source file:org.opensha.nshmp2.erf.source.PointSource13b.java

public static void main(String[] args) {

    System.out.println(NSHMP_Util.getMeanRJB(6.05, 1.0));
    //      double dist = 6.5;
    //      double xmag = 6.05;
    //      /*from   w  w  w. ja v a2s .co  m*/
    //      double dr_rjb = 1.0; // historic context; could be dropped
    //      double dm_rjb = 0.1;
    //      double xmmin_rjb = 6.05;
    //      
    //       int irjb = (int) (dist/dr_rjb+1);
    //        
    //       int m_ind = 1 + Math.max(0,(int) Math.rint((xmag-xmmin_rjb)/dm_rjb));
    //       m_ind= Math.min(26,m_ind);
    //       System.out.println("m_ind: " + m_ind);
    //       System.out.println("irjb: " + irjb);
    //       
    //       System.out.println("====");
    //       double mCorr = Math.round(xmag/0.05)*0.05;
    //      double r = NSHMP_Util.getMeanRJB(mCorr, dist);
    //      System.out.println(r);

    double Mw = 7.45;
    SingleMagFreqDist mfd = new SingleMagFreqDist(Mw, 1, 0.1, Mw, 1);
    Location srcLoc = new Location(31.6, -117.1);
    Location siteLoc = new Location(31.6, -117.105);
    double[] depths = new double[] { 5.0, 1.0 };

    Map<FocalMech, Double> mechMap = Maps.newHashMap();
    mechMap.put(FocalMech.STRIKE_SLIP, 0.0);
    mechMap.put(FocalMech.REVERSE, 0.0);
    mechMap.put(FocalMech.NORMAL, 1.0);

    PointSource13b ptSrc = new PointSource13b(srcLoc, mfd, 1.0, depths, mechMap);
    Joiner J = Joiner.on(" ");
    for (ProbEqkRupture rup : ptSrc) {
        PointSurface13b surf = (PointSurface13b) rup.getRuptureSurface();
        List<Double> attr = Lists.newArrayList(rup.getMag(), rup.getAveRake(), surf.getAveDip(), surf.zTop,
                surf.zBot, surf.widthH, surf.widthDD, surf.getDistanceJB(siteLoc), surf.getDistanceRup(siteLoc),
                surf.getDistanceX(siteLoc));

        System.out.println(J.join(attr) + " " + surf.footwall);
    }

}

From source file:apps.QueryReaderFactory.java

public static void main(String[] argv) {
    if (argv.length != 3) {
        System.err.println("Usage: <input file> <input type> <output file>");
    }/*from  w w w  .ja  v  a2s  .co  m*/

    BufferedWriter outputFile = null;

    try {
        QueryReader qr = createReader(argv[0], argv[1]);

        outputFile = new BufferedWriter(new OutputStreamWriter(CompressUtils.createOutputStream(argv[2])));

        XmlHelper xmlHlp = new XmlHelper();

        TextCleaner textCleaner = new TextCleaner(
                new DictNoComments(new File("data/stopwords.txt"), true /* lower case */),
                Source2XML.USE_STANFORD, Source2XML.USE_LEMMATIZER);

        Joiner spaceJoin = Joiner.on(' ');

        //Stemmer stemmer = new Stemmer();
        KrovetzStemmer stemmer = new KrovetzStemmer();

        System.out.println("Using Stanford NLP?        " + Source2XML.USE_STANFORD);
        System.out.println("Using Stanford lemmatizer? " + Source2XML.USE_LEMMATIZER);
        System.out.println("Using stemmer?             " + Source2XML.USE_STEMMER
                + (Source2XML.USE_STEMMER ? " (class: " + stemmer.getClass().getCanonicalName() + ")" : ""));

        for (int qid = 0; qid < qr.getQueryQty(); ++qid) {
            Map<String, String> outputMap = new HashMap<String, String>();

            ArrayList<String> toks = textCleaner.cleanUp(qr.getQuery(qid));

            outputMap.put(UtilConst.XML_FIELD_DOCNO, qr.getQueryId(qid));
            outputMap.put(UtilConst.XML_FIELD_TEXT, spaceJoin.join(toks));

            String xml = xmlHlp.genXMLIndexEntry(outputMap);

            try {
                outputFile.write(xml);
                outputFile.write(NL);
            } catch (Exception e) {
                e.printStackTrace();
                System.err.println("Error processing/saving a document!");
            }
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        System.err.println("Terminated due to exception: " + e);
        System.exit(1);
    } finally {
        try {
            if (null != outputFile)
                outputFile.close();
        } catch (IOException e) {
            System.err.println("IO exception: " + e);
            e.printStackTrace();
        }
    }
}

From source file:umd.twittertools.download.DataForHua.java

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("string").hasArg().withDescription("host").create(HOST_OPTION));
    options.addOption(OptionBuilder.withArgName("port").hasArg().withDescription("port").create(PORT_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing topics in TREC format").create(QUERIES_OPTION));
    options.addOption(/*from w  ww  . j  a  v  a  2  s .  c o  m*/
            OptionBuilder.withArgName("string").hasArg().withDescription("qrels file").create(QRELS_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("group id").create(GROUP_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("access token").create(TOKEN_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(HOST_OPTION) || !cmdline.hasOption(PORT_OPTION) || !cmdline.hasOption(QUERIES_OPTION)
            || !cmdline.hasOption(QRELS_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(DataForHua.class.getName(), options);
        System.exit(-1);
    }

    String queryFile = cmdline.getOptionValue(QUERIES_OPTION);
    if (!new File(queryFile).exists()) {
        System.err.println("Error: " + queryFile + " doesn't exist!");
        System.exit(-1);
    }

    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;

    TrecTopicSet topicsFile = TrecTopicSet.fromFile(new File(queryFile));

    int numResults = 10000;
    try {
        if (cmdline.hasOption(NUM_RESULTS_OPTION)) {
            numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION));
        }
    } catch (NumberFormatException e) {
        System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION));
        System.exit(-1);
    }

    String group = cmdline.hasOption(GROUP_OPTION) ? cmdline.getOptionValue(GROUP_OPTION) : null;
    String token = cmdline.hasOption(TOKEN_OPTION) ? cmdline.getOptionValue(TOKEN_OPTION) : null;

    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    TrecSearchThriftClient client = new TrecSearchThriftClient(cmdline.getOptionValue(HOST_OPTION),
            Integer.parseInt(cmdline.getOptionValue(PORT_OPTION)), group, token);

    TweetAnalyzer tokenizer = new TweetAnalyzer(Version.LUCENE_43, false); // no stemming
    Joiner joiner = Joiner.on(' ');
    String qrelsFile = cmdline.getOptionValue(QRELS_OPTION);
    Table<Integer, Long, Integer> groundTruth = RunTemporalModel.loadGroundTruth(qrelsFile);

    for (cc.twittertools.search.TrecTopic query : topicsFile) {
        List<TResult> results = client.search(query.getQuery(), query.getQueryTweetTime(), numResults);
        int i = 1;
        Set<Long> tweetIds = new HashSet<Long>();
        for (TResult result : results) {
            if (!tweetIds.contains(result.id)) {
                // The TREC official qrels don't have the "MB" prefix and trailing zeros, so we perform
                // this transformation so that trec_eval doesn't complain.
                tweetIds.add(result.id);
                Integer qid = Integer.parseInt(query.getId().replaceFirst("^MB0*", ""));

                if (groundTruth.contains(qid, result.id)) {
                    String qtext = joiner.join(LuceneTokenizer
                            .tokenize(tokenizer.tokenStream("text", new StringReader(query.getQuery()))));
                    String tweetText = joiner.join(LuceneTokenizer
                            .tokenize(tokenizer.tokenStream("text", new StringReader(result.text))));
                    int label = groundTruth.get(qid, result.id);
                    out.println(String.format("%d@%d@%d@%f@%s@%s", qid, result.id, label, result.rsv, qtext,
                            tweetText));
                }
                i++;
            }
        }
    }
    out.close();
}

From source file:edu.cmu.lti.oaqa.bio.index.medline.annotated.query.SimpleQueryApp.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("u", null, true, "Solr URI");
    options.addOption("n", null, true, "Max # of results");

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    try {//  w ww  . j  av a2 s  . c om
        CommandLine cmd = parser.parse(options, args);
        String solrURI = null;

        solrURI = cmd.getOptionValue("u");
        if (solrURI == null) {
            Usage("Specify Solr URI");
        }

        SolrServerWrapper solr = new SolrServerWrapper(solrURI);

        int numRet = 10;

        if (cmd.hasOption("n")) {
            numRet = Integer.parseInt(cmd.getOptionValue("n"));
        }

        List<String> fieldList = new ArrayList<String>();
        fieldList.add(UtilConstMedline.ID_FIELD);
        fieldList.add(UtilConstMedline.SCORE_FIELD);
        fieldList.add(UtilConstMedline.ARTICLE_TITLE_FIELD);
        fieldList.add(UtilConstMedline.ENTITIES_DESC_FIELD);
        fieldList.add(UtilConstMedline.ABSTRACT_TEXT_FIELD);

        BufferedReader sysInReader = new BufferedReader(new InputStreamReader(System.in));
        Joiner commaJoiner = Joiner.on(',');

        while (true) {
            System.out.println("Input query: ");
            String query = sysInReader.readLine();
            if (null == query)
                break;

            QueryTransformer qt = new QueryTransformer(query);

            String tranQuery = qt.getQuery();

            System.out.println("Translated query:");
            System.out.println(tranQuery);
            System.out.println("=========================");

            SolrDocumentList res = solr.runQuery(tranQuery, fieldList, numRet);

            System.out.println("Found " + res.getNumFound() + " entries");

            for (SolrDocument doc : res) {
                String id = (String) doc.getFieldValue(UtilConstMedline.ID_FIELD);
                float score = (Float) doc.getFieldValue(UtilConstMedline.SCORE_FIELD);
                String title = (String) doc.getFieldValue(UtilConstMedline.ARTICLE_TITLE_FIELD);
                String titleAbstract = (String) doc.getFieldValue(UtilConstMedline.ABSTRACT_TEXT_FIELD);

                System.out.println(score + " PMID=" + id + " " + titleAbstract);

                String entityDesc = (String) doc.getFieldValue(UtilConstMedline.ENTITIES_DESC_FIELD);
                System.out.println("Entities:");
                for (EntityEntry e : EntityEntry.parseEntityDesc(entityDesc)) {
                    System.out.println(String.format("[%d %d] concept=%s concept_ids=%s", e.mStart, e.mEnd,
                            e.mConcept, commaJoiner.join(e.mConceptIds)));
                }
            }
        }

        solr.close();

    } catch (ParseException e) {
        Usage("Cannot parse arguments");
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }
}