Example usage for org.apache.lucene.search IndexSearcher search

List of usage examples for org.apache.lucene.search IndexSearcher search

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher search.

Prototype

public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager)
        throws IOException 

Source Link

Document

Lower-level search API.

Usage

From source file:com.m3958.apps.pcms.lucene.facet.SimpleFacetsExample.java

License:Apache License

/** User runs a query and counts facets. */
private List<FacetResult> search() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);

    // Count both "Publish Date" and "Author" dimensions
    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Publish Date"), 10),
            new CountFacetRequest(new CategoryPath("Author"), 10));

    // Aggregates the facet counts
    FacetsCollector fc = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);

    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query, and use MultiCollector to
    // wrap collecting the "normal" hits and also facets:
    searcher.search(new MatchAllDocsQuery(), fc);

    // Retrieve results
    List<FacetResult> facetResults = fc.getFacetResults();

    indexReader.close();/*from  w  ww  .j av a 2 s . c om*/
    taxoReader.close();

    return facetResults;
}

From source file:com.m3958.apps.pcms.lucene.facet.SimpleFacetsExample.java

License:Apache License

/** User drills down on 'Publish Date/2010'. */
private List<FacetResult> drillDown() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);

    // Now user drills down on Publish Date/2010:
    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10));

    // Passing no baseQuery means we drill down on all
    // documents ("browse only"):
    DrillDownQuery q = new DrillDownQuery(fsp.indexingParams);
    q.add(new CategoryPath("Publish Date/2010", '/'));
    FacetsCollector fc = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
    searcher.search(q, fc);

    // Retrieve results
    List<FacetResult> facetResults = fc.getFacetResults();

    indexReader.close();//from ww  w.  j  a v  a 2  s . c  o  m
    taxoReader.close();

    return facetResults;
}

From source file:com.m3958.apps.pcms.lucene.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits.//from ww  w  . jav a  2  s . c  o  m
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;//

    //    DBObject queryCondition = new BasicDBObject();        
    //    //age in [13, 47]  
    //    queryCondition = new BasicDBObject();  
    //    BasicDBList values = new BasicDBList();  
    //    values.add(13);  
    //    values.add(47);  
    //    queryCondition.put("age", new BasicDBObject("$in", values));  
    //    DBCursor dbCursor = coll.find(queryCondition);

    int numTotalHits = results.totalHits;//
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) {// output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
                String title = doc.get("title");
                if (title != null) {
                    System.out.println("   Title: " + doc.get("title"));
                }
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:com.main.Searcher.java

public List<Bean> searching(String s1, String s2, String radioBtn)
        throws IOException, ParseException, InvalidTokenOffsetsException {
    //getting reference of directory
    Directory dir = FSDirectory.open(Paths.get(Index_Dir));

    //Index reader - an interface for accessing a point-in-time view of a lucene index
    IndexReader reader = DirectoryReader.open(dir);

    IndexSearcher searcher = new IndexSearcher(reader);
    //analyzer with the default stop words, takes out the stop words
    Analyzer analyzer = new StandardAnalyzer();

    String contents = "contents";

    QueryParser parser = new QueryParser(contents, analyzer);

    int numOfDoc = reader.numDocs();

    for (int i = 0; i < numOfDoc; i++) {

        Document d = reader.document(i);

    }/*ww  w .j  av  a  2 s  .  com*/

    Query q1 = parser.parse(s1);
    Query q2 = parser.parse(s2);

    //conjuction, disjunction and negation
    BooleanQuery.Builder bq = new BooleanQuery.Builder();

    //occur.must : both queries required in a doc
    if (radioBtn.equals("conjunction")) {
        bq.add(q1, BooleanClause.Occur.MUST);
        bq.add(q2, BooleanClause.Occur.MUST);
        bq.build();
    } //occur.should: one of the q1 should be presen t in doc
    else if (radioBtn.equals("disjunction")) {
        bq.add(q1, BooleanClause.Occur.SHOULD);
        bq.add(q2, BooleanClause.Occur.SHOULD);
        bq.build();
    } //negation: first should present , second should not
    else {
        bq.add(q1, BooleanClause.Occur.MUST);
        bq.add(q2, BooleanClause.Occur.MUST_NOT);
        bq.build();
    }

    TopDocs hits = searcher.search(bq.build(), 10);

    Formatter formatter = new SimpleHTMLFormatter();

    QueryScorer scorer = new QueryScorer(bq.build());

    //used to markup highlighted terms found in the best sections of a cont
    Highlighter highlighter = new Highlighter(formatter, scorer);
    //It breaks cont up into same-size texts but does not split up spans
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10);
    //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries.

    //set fragmenter to highlighter
    highlighter.setTextFragmenter(fragmenter);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
        Bean bean = new Bean();

        int outResult = hits.scoreDocs.length;
        bean.setNumFile(outResult);
        int docid = hits.scoreDocs[i].doc;
        double rank = hits.scoreDocs[i].score;
        bean.setRankSc(rank);
        Document doc = searcher.doc(docid);

        String name = doc.get("name");
        String title = doc.get("title");
        bean.setTitle(name);

        String path = doc.get("path");
        bean.setPath(path);

        String cont = doc.get("contents");
        //Create token stream
        TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer);
        //Get highlighted cont fragments
        String[] frags = highlighter.getBestFragments(stream, cont, 10);

        ArrayList<String> dummy = new ArrayList<>();
        for (String frag : frags) {

            dummy.add(frag);
        }

        bean.setContent(dummy);
        beanList.add(bean);
    }

    dir.close();
    // }
    return beanList;
}

From source file:com.main.Searcher.java

public List<Bean> searching(String s1) throws IOException, ParseException, InvalidTokenOffsetsException {
    //Get directory reference
    Directory dir = FSDirectory.open(Paths.get(Index_Dir));
    //Index reader - an interface for accessing a point-in-time view of a lucene index
    IndexReader reader = DirectoryReader.open(dir);
    //CreateIndexReader reader = DirectoryReader.open(dir); lucene searcher. It search over a single IndexReader.
    IndexSearcher searcher = new IndexSearcher(reader);
    //analyzer with the default stop words
    Analyzer analyzer = new StandardAnalyzer();
    //Query parser to be used for creating TermQuery

    String queries = null;/*from  w  w w .  ja va 2s. c o  m*/
    String queryString = null; //regular search
    String contents = "contents";
    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(contents, analyzer);

    int numOfDoc = reader.numDocs();

    for (int i = 0; i < numOfDoc; i++) {

        Document d = reader.document(i);

    }

    Query q1 = parser.parse(s1);

    BooleanQuery.Builder bq = new BooleanQuery.Builder();

    bq.add(q1, BooleanClause.Occur.MUST);
    //Search the lucene documents
    TopDocs hits = searcher.search(bq.build(), 10);
    // TopScoreDocCollector collector = TopScoreDocCollector.create(5);
    /**
     * Highlighter Code Start ***
     */
    //Uses HTML &lt;B&gt;&lt;/B&gt; tag to highlight the searched terms
    Formatter formatter = new SimpleHTMLFormatter();
    //It scores cont fragments by the number of unique q1 terms found
    //Basically the matching score in layman terms
    QueryScorer scorer = new QueryScorer(bq.build());
    //used to markup highlighted terms found in the best sections of a cont
    Highlighter highlighter = new Highlighter(formatter, scorer);
    //It breaks cont up into same-size texts but does not split up spans
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10);
    //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries.

    //set fragmenter to highlighter
    highlighter.setTextFragmenter(fragmenter);
    //Iterate over found results
    for (int i = 0; i < hits.scoreDocs.length; i++) {
        Bean bean = new Bean();
        //int rank = hits.scoreDocs.length;
        int outResult = hits.scoreDocs.length;
        bean.setNumFile(outResult);
        int docid = hits.scoreDocs[i].doc;
        double rank = hits.scoreDocs[i].score;
        bean.setRankSc(rank);
        Document doc = searcher.doc(docid);
        // String title = doc.get("title");
        String name = doc.get("name");
        String title = doc.get("title");
        bean.setTitle(name);

        String path = doc.get("path");
        bean.setPath(path);

        String cont = doc.get("contents");
        //Create token stream
        TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer);
        //Get highlighted cont fragments
        String[] frags = highlighter.getBestFragments(stream, cont, 10);

        ArrayList<String> dummy = new ArrayList<>();
        for (String frag : frags) {

            dummy.add(frag);
        }

        bean.setContent(dummy);
        beanList.add(bean);
    }

    dir.close();
    // }
    return beanList;
}

From source file:com.mathworks.xzheng.advsearching.MultiFieldQueryParserTest.java

License:Apache License

public void testDefaultOperator() throws Exception {
    Query query = new MultiFieldQueryParser(Version.LUCENE_46, new String[] { "title", "subject" },
            new SimpleAnalyzer(Version.LUCENE_46)).parse("development");

    Directory dir = TestUtil.getBookIndexDirectory();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
    TopDocs hits = searcher.search(query, 10);

    assertTrue(TestUtil.hitsIncludeTitle(searcher, hits, "Ant in Action"));

    assertTrue(TestUtil.hitsIncludeTitle( //A
            searcher, //A
            hits, //A
            "Extreme Programming Explained")); //A

    dir.close();//from w w  w.  ja v a 2s .  com
}

From source file:com.mathworks.xzheng.advsearching.MultiFieldQueryParserTest.java

License:Apache License

public void testSpecifiedOperator() throws Exception {
    Query query = MultiFieldQueryParser.parse(Version.LUCENE_46, "lucene", new String[] { "title", "subject" },
            new BooleanClause.Occur[] { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST },
            new SimpleAnalyzer(Version.LUCENE_46));

    Directory dir = TestUtil.getBookIndexDirectory();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
    TopDocs hits = searcher.search(query, 10);

    assertTrue(TestUtil.hitsIncludeTitle(searcher, hits, "Lucene in Action, Second Edition"));
    assertEquals("one and only one", 1, hits.scoreDocs.length);

    dir.close();/*from w w  w .  j a v a2  s. co m*/
}

From source file:com.mathworks.xzheng.advsearching.MultiSearcherTest.java

License:Apache License

public void testMulti() throws Exception {

    MultiReader multiReader = new MultiReader(searchers[0].getIndexReader(), searchers[1].getIndexReader());
    IndexSearcher searcher = new IndexSearcher(multiReader);

    TermRangeQuery query = new TermRangeQuery("animal", // #3
            new BytesRef("h"), // #3
            new BytesRef("t"), // #3
            true, true);// #3

    TopDocs hits = searcher.search(query, 10);
    assertEquals("tarantula not included", 12, hits.totalHits);
}

From source file:com.mathworks.xzheng.advsearching.TimeLimitingCollectorTest.java

License:Apache License

public void testTimeLimitingCollector() throws Exception {
    Directory dir = TestUtil.getBookIndexDirectory();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
    Query q = new MatchAllDocsQuery();
    int numAllBooks = TestUtil.hitCount(searcher, q);

    TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false);
    Collector collector = new TimeLimitingCollector(topDocs, // #A
            null, 1000); // #A
    try {/*  w  ww. j a va 2  s  .  c om*/
        searcher.search(q, collector);
        assertEquals(numAllBooks, topDocs.getTotalHits()); // #B
    } catch (TimeExceededException tee) { // #C
        System.out.println("Too much time taken."); // #C
    } // #C

    dir.close();
}

From source file:com.mathworks.xzheng.analysis.codec.MetaphoneAnalyzerTest.java

License:Apache License

public void testKoolKat() throws Exception {
    RAMDirectory directory = new RAMDirectory();
    Analyzer analyzer = new MetaphoneReplacementAnalyzer();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    IndexWriter writer = new IndexWriter(directory, config);

    Document doc = new Document();
    doc.add(new Field("contents", //#A
            "cool cat", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc);/*from  w  w w.ja  v a2 s.  c om*/
    writer.close();

    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));

    Query query = new QueryParser(Version.LUCENE_46, //#B
            "contents", analyzer) //#B
                    .parse("kool kat"); //#B

    TopDocs hits = searcher.search(query, 1);
    assertEquals(1, hits.totalHits); //#C
    int docID = hits.scoreDocs[0].doc;
    doc = searcher.doc(docID);
    assertEquals("cool cat", doc.get("contents")); //#D

}