Example usage for org.apache.lucene.search IndexSearcher setSimilarity

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher setSimilarity.

Prototype

public void setSimilarity(Similarity similarity)

Source Link

Document

Expert: Set the Similarity implementation used by this IndexSearcher.

Usage

From source file:IrqaQuery.java

License:Apache License

public static List<Document> query(String index, String stoppath, String question, int numResult, String sim)
        throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath)));

    if (sim.equals("TFIDF"))
        searcher.setSimilarity(new ClassicSimilarity());
    else if (sim.equals("BM25"))
        searcher.setSimilarity(new BM25Similarity());
    else//from  w ww  .ja  va  2s .  c o m
        searcher.setSimilarity(new BM25Similarity());

    String field = "contents";
    QueryParser parser = new QueryParser(field, analyzer);
    Query query = parser.parse(parser.escape(question));

    TopDocs results = searcher.search(query, numResult);
    ScoreDoc[] hits = results.scoreDocs;
    List<Document> docs = new ArrayList<Document>();

    int numTotalHits = results.totalHits;
    //        System.out.println(numTotalHits + " total matching documents");

    int end = Math.min(numTotalHits, numResult);

    String searchResult = "";
    //        System.out.println("Only results 1 - " + hits.length);

    for (int i = 0; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        docs.add(doc);
    }

    return docs;
}

From source file:luceneInterface.java

License:Apache License

public static List<Document> query(String index, String stoppath, String question, int numResult, String sim)
        throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath)));

    if (sim.equals("TFIDF"))
        searcher.setSimilarity(new ClassicSimilarity());
    else if (sim.equals("BM25"))
        searcher.setSimilarity(new BM25Similarity());
    else/*from w ww.  j  av a 2  s. com*/
        searcher.setSimilarity(new BM25Similarity());

    String field = "contents";
    QueryParser parser = new QueryParser(field, analyzer);
    Query query = parser.parse(parser.escape(question));

    BooleanQuery.Builder bqb = new BooleanQuery.Builder();
    bqb.add(new TermQuery(new Term("contents", parser.escape(question))), BooleanClause.Occur.SHOULD);
    bqb.add(new TermQuery(new Term("sec", parser.escape(question))), BooleanClause.Occur.SHOULD);

    //        Term term = new Term(field, question);
    //        Query query = new TermQuery(term);

    //        TopDocs results = searcher.search(query, numResult);
    TopDocs results = searcher.search(parser.parse(bqb.build().toString()), numResult);

    ScoreDoc[] hits = results.scoreDocs;
    List<Document> docs = new ArrayList<Document>();

    int numTotalHits = results.totalHits;
    //        System.out.println(numTotalHits + " total matching documents");

    int end = Math.min(numTotalHits, numResult);

    String searchResult = "";
    //        System.out.println("Only results 1 - " + hits.length);

    for (int i = 0; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        docs.add(doc);
    }

    return docs;
}

From source file:aos.lucene.search.ext.payloads.PayloadsTest.java

License:Apache License

public void testPayloadTermQuery() throws Throwable {
    addDoc("Hurricane warning",
            "Bulletin: A hurricane warning was issued at " + "6 AM for the outer great banks");
    addDoc("Warning label maker", "The warning label maker is a delightful toy for "
            + "your precocious seven year old's warning needs");
    addDoc("Tornado warning",
            "Bulletin: There is a tornado warning for " + "Worcester county until 6 PM today");

    IndexReader r = writer.getReader();//  ww w .ja  v  a 2s. c om
    writer.close();

    IndexSearcher searcher = new IndexSearcher(r);

    searcher.setSimilarity(new BoostingSimilarity());

    Term warning = new Term("contents", "warning");

    Query query1 = new TermQuery(warning);
    LOGGER.info("\nTermQuery results:");
    TopDocs hits = searcher.search(query1, 10);
    TestUtil.dumpHits(searcher, hits);

    assertEquals("Warning label maker", // #B
            searcher.doc(hits.scoreDocs[0].doc).get("title")); // #B

    Query query2 = new PayloadTermQuery(warning, new AveragePayloadFunction());
    LOGGER.info("\nPayloadTermQuery results:");
    hits = searcher.search(query2, 10);
    TestUtil.dumpHits(searcher, hits);

    assertEquals("Warning label maker", // #C
            searcher.doc(hits.scoreDocs[2].doc).get("title")); // #C
    r.close();
    searcher.close();
}

From source file:aos.lucene.search.msc.ScoreTest.java

License:Apache License

public void testSimple() throws Exception {
    indexSingleFieldDocs(new Field[] { new Field("contents", "x", Field.Store.YES, Field.Index.ANALYZED) });
    IndexSearcher searcher = new IndexSearcher(directory);
    searcher.setSimilarity(new SimpleSimilarity());

    Query query = new TermQuery(new Term("contents", "x"));
    Explanation explanation = searcher.explain(query, 0);
    LOGGER.info(explanation);//  w  ww . jav a  2s  .  co  m

    TopDocs matches = searcher.search(query, 10);
    assertEquals(1, matches.totalHits);

    assertEquals(1F, matches.scoreDocs[0].score, 0.0);

    searcher.close();
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

private void testQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException {
    // create results from mcs:
    LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>();
    for (int j = 0; j < ir.numDocs(); j++) {
        Graph model = new Graph(ir.document(j).getValues("graph")[0]);
        float mcsSimilarity = query.getMcsSimilarity(model);
        resultsMcs.add(new ResultHolder(j, model.toString(), mcsSimilarity));
    }/* w w w  .  j  a  va2  s  . co m*/
    Collections.sort(resultsMcs);
    //            for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) {
    //                ResultHolder r = iterator.next();
    //                System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity());
    //            }

    // create results from search:

    // set to another similarity if necessary:
    is.setSimilarity(new TermFrequencySimilarity());
    //        is.setSimilarity(new SimpleTfIdfSimilarity());

    LucenePathIndexRetrievalEngine engine = new LucenePathIndexRetrievalEngine(50);
    String gQuery = LucenePathIndexRetrievalEngine.createLucenePathQuery(query);
    //        System.out.println(query);
    QueryParser qParse = new QueryParser("paths", new WhitespaceAnalyzer());
    Query q = qParse.parse(gQuery);
    Hits hits = is.search(q);
    LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>();
    for (int i = 0; i < hits.length(); i++) {
        String graph = hits.doc(i).getValues("graph")[0];
        int docID = -1;
        for (int j = 0; j < ir.numDocs(); j++) {
            Graph model = new Graph(ir.document(j).getValues("graph")[0]);
            if (model.toString().equals(graph))
                docID = j;
        }
        resultsSearch.add(new ResultHolder(docID, graph, hits.score(i)));
    }
    Collections.sort(resultsSearch);
    printPrecisionRecallPlot(resultsMcs, resultsSearch);
}

From source file:br.usp.icmc.gazetteer.SemanticSearchTest.LuceneSearcher.java

License:Open Source License

public void search1(String q) throws IOException, ParseException {
    IndexReader ireader = IndexReader.open(dir); // read-only=true
    IndexSearcher reader = new IndexSearcher(ireader);
    reader.setSimilarity(similarityltc()); //ddd.qqq
    busca(reader, q);/*  w  w w  .j a  v a  2  s.c  o m*/
    numQ++;
}

From source file:cc.twittertools.search.api.TrecSearchThriftServer.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(HELP_OPTION, "show help"));
    options.addOption(OptionBuilder.withArgName("port").hasArg().withDescription("port").create(PORT_OPTION));
    options.addOption(/*  ww w. j  ava 2s  .c  o  m*/
            OptionBuilder.withArgName("index").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg()
            .withDescription("max number of threads in thread pool").create(MAX_THREADS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing access tokens").create(CREDENTIALS_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(TrecSearchThriftServer.class.getName(), options);
        System.exit(-1);
    }

    int port = cmdline.hasOption(PORT_OPTION) ? Integer.parseInt(cmdline.getOptionValue(PORT_OPTION))
            : DEFAULT_PORT;
    int maxThreads = cmdline.hasOption(MAX_THREADS_OPTION)
            ? Integer.parseInt(cmdline.getOptionValue(MAX_THREADS_OPTION))
            : DEFAULT_MAX_THREADS;
    File index = new File(cmdline.getOptionValue(INDEX_OPTION));

    Map<String, String> credentials = null;
    if (cmdline.hasOption(CREDENTIALS_OPTION)) {
        credentials = Maps.newHashMap();
        File cfile = new File(cmdline.getOptionValue(CREDENTIALS_OPTION));
        if (!cfile.exists()) {
            System.err.println("Error: " + cfile + " does not exist!");
            System.exit(-1);
        }
        for (String s : Files.readLines(cfile, Charsets.UTF_8)) {
            try {
                String[] arr = s.split(":");
                credentials.put(arr[0], arr[1]);
            } catch (Exception e) {
                // Catch any exceptions from parsing file contain access tokens
                System.err.println("Error reading access tokens from " + cfile + "!");
                System.exit(-1);
            }
        }
    }

    if (!index.exists()) {
        System.err.println("Error: " + index + " does not exist!");
        System.exit(-1);
    }

    IndexReader reader = DirectoryReader.open(MMapDirectory.open(index));
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new LMDirichletSimilarity(DEFAULT_MU));

    QueryLikelihoodModel qlModel = new QueryLikelihoodModel(DEFAULT_MU);

    TServerSocket serverSocket = new TServerSocket(port);
    TrecSearch.Processor<TrecSearch.Iface> searchProcessor = new TrecSearch.Processor<TrecSearch.Iface>(
            new TrecSearchHandler(searcher, qlModel, credentials));

    TThreadPoolServer.Args serverArgs = new TThreadPoolServer.Args(serverSocket);
    serverArgs.maxWorkerThreads(maxThreads);
    TServer thriftServer = new TThreadPoolServer(
            serverArgs.processor(searchProcessor).protocolFactory(new TBinaryProtocol.Factory()));

    thriftServer.serve();
}

From source file:cc.twittertools.search.local.RunQueries.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(/*from  www.j  a v a 2s  . c om*/
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing topics in TREC format").create(QUERIES_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(RunQueries.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;

    String topicsFile = cmdline.getOptionValue(QUERIES_OPTION);

    int numResults = 1000;
    try {
        if (cmdline.hasOption(NUM_RESULTS_OPTION)) {
            numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION));
        }
    } catch (NumberFormatException e) {
        System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION));
        System.exit(-1);
    }

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER);

    TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile));
    for (TrecTopic topic : topics) {
        Query query = p.parse(topic.getQuery());
        Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(),
                true, true);

        TopDocs rs = searcher.search(query, filter, numResults);

        int i = 1;
        for (ScoreDoc scoreDoc : rs.scoreDocs) {
            Document hit = searcher.doc(scoreDoc.doc);
            out.println(String.format("%s Q0 %s %d %f %s", topic.getId(),
                    hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag));
            if (verbose) {
                out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
            }
            i++;
        }
    }
    reader.close();
    out.close();
}

From source file:cc.twittertools.search.local.SearchStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(//from  w  w  w.j  a v  a 2s. c  o  m
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(SearchStatuses.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID;
    String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q;
    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;
    long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION))
            : DEFAULT_MAX_ID;
    int numResults = cmdline.hasOption(NUM_RESULTS_OPTION)
            ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION))
            : DEFAULT_NUM_RESULTS;
    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name,
            IndexStatuses.ANALYZER);
    Query query = p.parse(queryText);
    Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true);

    TopDocs rs = searcher.search(query, filter, numResults);

    int i = 1;
    for (ScoreDoc scoreDoc : rs.scoreDocs) {
        Document hit = searcher.doc(scoreDoc.doc);

        out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i,
                scoreDoc.score, runtag));
        if (verbose) {
            out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
        }
        i++;
    }

    reader.close();
    out.close();
}

From source file:ci6226.eval_index_reader.java

public eval_index_reader(Analyzer _analyzer, String _dir, String[] _searchList, int _topn)
        throws IOException, org.apache.lucene.queryparser.classic.ParseException, InvalidTokenOffsetsException {
    String indexdir = "./" + _dir;
    String field = "text";
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexdir)));
    IndexSearcher searcher = new IndexSearcher(reader);
    PrintWriter writer = new PrintWriter(_dir + ".csv", "UTF-8");

    Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer);

    searcher.setSimilarity(new similarity_tf_rm());
    Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer);

    searcher.setSimilarity(new similiarty_queryNorm());
    Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer);

    writer.close();//from  w  w w.  ja va2  s.  c o  m
    reader.close();
    /// searcher.setSimilarity(null);

}