Example usage for org.apache.lucene.search IndexSearcher setSimilarity

List of usage examples for org.apache.lucene.search IndexSearcher setSimilarity

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher setSimilarity.

Prototype

public void setSimilarity(Similarity similarity) 

Source Link

Document

Expert: Set the Similarity implementation used by this IndexSearcher.

Usage

From source file:IrqaQuery.java

License:Apache License

public static List<Document> query(String index, String stoppath, String question, int numResult, String sim)
        throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath)));

    if (sim.equals("TFIDF"))
        searcher.setSimilarity(new ClassicSimilarity());
    else if (sim.equals("BM25"))
        searcher.setSimilarity(new BM25Similarity());
    else//from  w ww  .ja  va  2s .  c o m
        searcher.setSimilarity(new BM25Similarity());

    String field = "contents";
    QueryParser parser = new QueryParser(field, analyzer);
    Query query = parser.parse(parser.escape(question));

    TopDocs results = searcher.search(query, numResult);
    ScoreDoc[] hits = results.scoreDocs;
    List<Document> docs = new ArrayList<Document>();

    int numTotalHits = results.totalHits;
    //        System.out.println(numTotalHits + " total matching documents");

    int end = Math.min(numTotalHits, numResult);

    String searchResult = "";
    //        System.out.println("Only results 1 - " + hits.length);

    for (int i = 0; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        docs.add(doc);
    }

    return docs;
}

From source file:luceneInterface.java

License:Apache License

public static List<Document> query(String index, String stoppath, String question, int numResult, String sim)
        throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath)));

    if (sim.equals("TFIDF"))
        searcher.setSimilarity(new ClassicSimilarity());
    else if (sim.equals("BM25"))
        searcher.setSimilarity(new BM25Similarity());
    else/*from w ww.  j  av a 2  s. com*/
        searcher.setSimilarity(new BM25Similarity());

    String field = "contents";
    QueryParser parser = new QueryParser(field, analyzer);
    Query query = parser.parse(parser.escape(question));

    BooleanQuery.Builder bqb = new BooleanQuery.Builder();
    bqb.add(new TermQuery(new Term("contents", parser.escape(question))), BooleanClause.Occur.SHOULD);
    bqb.add(new TermQuery(new Term("sec", parser.escape(question))), BooleanClause.Occur.SHOULD);

    //        Term term = new Term(field, question);
    //        Query query = new TermQuery(term);

    //        TopDocs results = searcher.search(query, numResult);
    TopDocs results = searcher.search(parser.parse(bqb.build().toString()), numResult);

    ScoreDoc[] hits = results.scoreDocs;
    List<Document> docs = new ArrayList<Document>();

    int numTotalHits = results.totalHits;
    //        System.out.println(numTotalHits + " total matching documents");

    int end = Math.min(numTotalHits, numResult);

    String searchResult = "";
    //        System.out.println("Only results 1 - " + hits.length);

    for (int i = 0; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        docs.add(doc);
    }

    return docs;
}

From source file:aos.lucene.search.ext.payloads.PayloadsTest.java

License:Apache License

public void testPayloadTermQuery() throws Throwable {
    addDoc("Hurricane warning",
            "Bulletin: A hurricane warning was issued at " + "6 AM for the outer great banks");
    addDoc("Warning label maker", "The warning label maker is a delightful toy for "
            + "your precocious seven year old's warning needs");
    addDoc("Tornado warning",
            "Bulletin: There is a tornado warning for " + "Worcester county until 6 PM today");

    IndexReader r = writer.getReader();//  ww w .ja  v  a 2s. c om
    writer.close();

    IndexSearcher searcher = new IndexSearcher(r);

    searcher.setSimilarity(new BoostingSimilarity());

    Term warning = new Term("contents", "warning");

    Query query1 = new TermQuery(warning);
    LOGGER.info("\nTermQuery results:");
    TopDocs hits = searcher.search(query1, 10);
    TestUtil.dumpHits(searcher, hits);

    assertEquals("Warning label maker", // #B
            searcher.doc(hits.scoreDocs[0].doc).get("title")); // #B

    Query query2 = new PayloadTermQuery(warning, new AveragePayloadFunction());
    LOGGER.info("\nPayloadTermQuery results:");
    hits = searcher.search(query2, 10);
    TestUtil.dumpHits(searcher, hits);

    assertEquals("Warning label maker", // #C
            searcher.doc(hits.scoreDocs[2].doc).get("title")); // #C
    r.close();
    searcher.close();
}

From source file:aos.lucene.search.msc.ScoreTest.java

License:Apache License

public void testSimple() throws Exception {
    indexSingleFieldDocs(new Field[] { new Field("contents", "x", Field.Store.YES, Field.Index.ANALYZED) });
    IndexSearcher searcher = new IndexSearcher(directory);
    searcher.setSimilarity(new SimpleSimilarity());

    Query query = new TermQuery(new Term("contents", "x"));
    Explanation explanation = searcher.explain(query, 0);
    LOGGER.info(explanation);//  w  ww . jav a  2s  .  co  m

    TopDocs matches = searcher.search(query, 10);
    assertEquals(1, matches.totalHits);

    assertEquals(1F, matches.scoreDocs[0].score, 0.0);

    searcher.close();
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

private void testQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException {
    // create results from mcs:
    LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>();
    for (int j = 0; j < ir.numDocs(); j++) {
        Graph model = new Graph(ir.document(j).getValues("graph")[0]);
        float mcsSimilarity = query.getMcsSimilarity(model);
        resultsMcs.add(new ResultHolder(j, model.toString(), mcsSimilarity));
    }/* w w w  .  j  a  va2  s  . co m*/
    Collections.sort(resultsMcs);
    //            for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) {
    //                ResultHolder r = iterator.next();
    //                System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity());
    //            }

    // create results from search:

    // set to another similarity if necessary:
    is.setSimilarity(new TermFrequencySimilarity());
    //        is.setSimilarity(new SimpleTfIdfSimilarity());

    LucenePathIndexRetrievalEngine engine = new LucenePathIndexRetrievalEngine(50);
    String gQuery = LucenePathIndexRetrievalEngine.createLucenePathQuery(query);
    //        System.out.println(query);
    QueryParser qParse = new QueryParser("paths", new WhitespaceAnalyzer());
    Query q = qParse.parse(gQuery);
    Hits hits = is.search(q);
    LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>();
    for (int i = 0; i < hits.length(); i++) {
        String graph = hits.doc(i).getValues("graph")[0];
        int docID = -1;
        for (int j = 0; j < ir.numDocs(); j++) {
            Graph model = new Graph(ir.document(j).getValues("graph")[0]);
            if (model.toString().equals(graph))
                docID = j;
        }
        resultsSearch.add(new ResultHolder(docID, graph, hits.score(i)));
    }
    Collections.sort(resultsSearch);
    printPrecisionRecallPlot(resultsMcs, resultsSearch);
}

From source file:br.usp.icmc.gazetteer.SemanticSearchTest.LuceneSearcher.java

License:Open Source License

public void search1(String q) throws IOException, ParseException {
    IndexReader ireader = IndexReader.open(dir); // read-only=true
    IndexSearcher reader = new IndexSearcher(ireader);
    reader.setSimilarity(similarityltc()); //ddd.qqq
    busca(reader, q);/*  w  w w  .j a  v a  2  s.c  o m*/
    numQ++;
}

From source file:cc.twittertools.search.api.TrecSearchThriftServer.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(HELP_OPTION, "show help"));
    options.addOption(OptionBuilder.withArgName("port").hasArg().withDescription("port").create(PORT_OPTION));
    options.addOption(/*  ww w. j  ava 2s  .c  o  m*/
            OptionBuilder.withArgName("index").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg()
            .withDescription("max number of threads in thread pool").create(MAX_THREADS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing access tokens").create(CREDENTIALS_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(TrecSearchThriftServer.class.getName(), options);
        System.exit(-1);
    }

    int port = cmdline.hasOption(PORT_OPTION) ? Integer.parseInt(cmdline.getOptionValue(PORT_OPTION))
            : DEFAULT_PORT;
    int maxThreads = cmdline.hasOption(MAX_THREADS_OPTION)
            ? Integer.parseInt(cmdline.getOptionValue(MAX_THREADS_OPTION))
            : DEFAULT_MAX_THREADS;
    File index = new File(cmdline.getOptionValue(INDEX_OPTION));

    Map<String, String> credentials = null;
    if (cmdline.hasOption(CREDENTIALS_OPTION)) {
        credentials = Maps.newHashMap();
        File cfile = new File(cmdline.getOptionValue(CREDENTIALS_OPTION));
        if (!cfile.exists()) {
            System.err.println("Error: " + cfile + " does not exist!");
            System.exit(-1);
        }
        for (String s : Files.readLines(cfile, Charsets.UTF_8)) {
            try {
                String[] arr = s.split(":");
                credentials.put(arr[0], arr[1]);
            } catch (Exception e) {
                // Catch any exceptions from parsing file contain access tokens
                System.err.println("Error reading access tokens from " + cfile + "!");
                System.exit(-1);
            }
        }
    }

    if (!index.exists()) {
        System.err.println("Error: " + index + " does not exist!");
        System.exit(-1);
    }

    IndexReader reader = DirectoryReader.open(MMapDirectory.open(index));
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new LMDirichletSimilarity(DEFAULT_MU));

    QueryLikelihoodModel qlModel = new QueryLikelihoodModel(DEFAULT_MU);

    TServerSocket serverSocket = new TServerSocket(port);
    TrecSearch.Processor<TrecSearch.Iface> searchProcessor = new TrecSearch.Processor<TrecSearch.Iface>(
            new TrecSearchHandler(searcher, qlModel, credentials));

    TThreadPoolServer.Args serverArgs = new TThreadPoolServer.Args(serverSocket);
    serverArgs.maxWorkerThreads(maxThreads);
    TServer thriftServer = new TThreadPoolServer(
            serverArgs.processor(searchProcessor).protocolFactory(new TBinaryProtocol.Factory()));

    thriftServer.serve();
}

From source file:cc.twittertools.search.local.RunQueries.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(/*from  www.j  a v a 2s  . c om*/
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing topics in TREC format").create(QUERIES_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(RunQueries.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;

    String topicsFile = cmdline.getOptionValue(QUERIES_OPTION);

    int numResults = 1000;
    try {
        if (cmdline.hasOption(NUM_RESULTS_OPTION)) {
            numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION));
        }
    } catch (NumberFormatException e) {
        System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION));
        System.exit(-1);
    }

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER);

    TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile));
    for (TrecTopic topic : topics) {
        Query query = p.parse(topic.getQuery());
        Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(),
                true, true);

        TopDocs rs = searcher.search(query, filter, numResults);

        int i = 1;
        for (ScoreDoc scoreDoc : rs.scoreDocs) {
            Document hit = searcher.doc(scoreDoc.doc);
            out.println(String.format("%s Q0 %s %d %f %s", topic.getId(),
                    hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag));
            if (verbose) {
                out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
            }
            i++;
        }
    }
    reader.close();
    out.close();
}

From source file:cc.twittertools.search.local.SearchStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(//from  w  w  w.j  a v  a 2s. c  o  m
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(SearchStatuses.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID;
    String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q;
    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;
    long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION))
            : DEFAULT_MAX_ID;
    int numResults = cmdline.hasOption(NUM_RESULTS_OPTION)
            ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION))
            : DEFAULT_NUM_RESULTS;
    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name,
            IndexStatuses.ANALYZER);
    Query query = p.parse(queryText);
    Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true);

    TopDocs rs = searcher.search(query, filter, numResults);

    int i = 1;
    for (ScoreDoc scoreDoc : rs.scoreDocs) {
        Document hit = searcher.doc(scoreDoc.doc);

        out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i,
                scoreDoc.score, runtag));
        if (verbose) {
            out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
        }
        i++;
    }

    reader.close();
    out.close();
}

From source file:ci6226.eval_index_reader.java

public eval_index_reader(Analyzer _analyzer, String _dir, String[] _searchList, int _topn)
        throws IOException, org.apache.lucene.queryparser.classic.ParseException, InvalidTokenOffsetsException {
    String indexdir = "./" + _dir;
    String field = "text";
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexdir)));
    IndexSearcher searcher = new IndexSearcher(reader);
    PrintWriter writer = new PrintWriter(_dir + ".csv", "UTF-8");

    Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer);

    searcher.setSimilarity(new similarity_tf_rm());
    Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer);

    searcher.setSimilarity(new similiarty_queryNorm());
    Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer);

    writer.close();//from  w  w w.  ja va2  s.  c o  m
    reader.close();
    /// searcher.setSimilarity(null);

}