Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:com.sg.business.vault.index.demo.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search
 * engine presents pages of size n to the user. The user can then go to the
 * next page if interested in the next hits.
 * //from  w ww. j  a  v a 2  s.c  o m
 * When the query is executed for the first time, then only enough results
 * are collected to fill 5 result pages. If the user wants to page beyond
 * this limit, then the query is executed another time and all hits are
 * collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents"); //$NON-NLS-1$

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " //$NON-NLS-1$ //$NON-NLS-2$
                    + numTotalHits + " total matching documents collected."); //$NON-NLS-1$
            System.out.println("Collect more (y/n) ?"); //$NON-NLS-1$
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" //$NON-NLS-1$ //$NON-NLS-2$
                        + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("filename"); //$NON-NLS-1$
            if (path != null) {
                System.out.println((i + 1) + ". " + path); //$NON-NLS-1$
                String title = doc.get("title"); //$NON-NLS-1$
                if (title != null) {
                    System.out.println("   Title: " + doc.get("title")); //$NON-NLS-1$ //$NON-NLS-2$
                }
            } else {
                System.out.println((i + 1) + ". " //$NON-NLS-1$
                        + "No path for this document"); //$NON-NLS-1$
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press "); //$NON-NLS-1$
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, "); //$NON-NLS-1$
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, "); //$NON-NLS-1$
                }
                System.out.println("(q)uit or enter number to jump to a page."); //$NON-NLS-1$

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page"); //$NON-NLS-1$
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:com.shaie.annots.example.AnnotatorAnyExample.java

License:Apache License

private static void search(IndexSearcher searcher, Query q) throws IOException {
    System.out.println(format("Searching for [%s]:", q));
    final TopDocs results = searcher.search(q, 10);
    for (final ScoreDoc sd : results.scoreDocs) {
        System.out.println(format("  doc=%d, text=%s", sd.doc, searcher.doc(sd.doc).get(TEXT_FIELD)));
    }/*from ww  w. java2  s.  c  o m*/
}

From source file:com.shaie.facet.NotDrillDownExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    createIndex();/*from  www . ja v  a2 s .  c  o m*/

    try (DirectoryReader indexReader = DirectoryReader.open(indexDir);
            TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);) {
        final IndexSearcher searcher = new IndexSearcher(indexReader);

        // Find the index field which holds the 'Author' facets
        final String indexedField = config.getDimConfig(AUTHOR_FACET).indexFieldName;
        final Query q = new BooleanQuery.Builder()
                // Here you would usually use a different query
                .add(new MatchAllDocsQuery(), Occur.MUST)
                // Exclude results with Author/Lisa
                .add(new TermQuery(DrillDownQuery.term(indexedField, AUTHOR_FACET, "Lisa")), Occur.MUST_NOT)
                .build();

        final TopDocs topDocs = searcher.search(q, 10);
        assert topDocs.scoreDocs.length == 1 : "should have found 1 document with Author/Bob";
        final Document doc = searcher.doc(topDocs.scoreDocs[0].doc);
        System.out.println(doc);
    }
}

From source file:com.sindicetech.siren.demo.SimpleSearcher.java

License:Open Source License

public Document retrieve(final int docID) throws IOException {
    IndexSearcher searcher = mgr.acquire();

    try {/*from  www.  j  av a2 s .c  om*/
        return searcher.doc(docID);
    } finally {
        mgr.release(searcher);
    }
}

From source file:com.slieer.app.lecene3x.LuceneIndexAndSearchDemo.java

License:Apache License

/**
 *  ???//from  w w  w.  j  a v a  2s .c o  m
 * 
 * @param args
 */
public static void main(String[] args) {
    // Lucene Document??
    String fieldName = "text";
    // 
    String text = "IK Analyzer???????";
    String text1 = "? (Chinese Word Segmentation) ???????????";
    String text2 = "?????,,??,?";

    // IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(true);

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        // 
        directory = new RAMDirectory();

        // ?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        // 
        Document doc = new Document();
        //document.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
        Field strField = new StringField("ID", "10000", Field.Store.YES);
        Field textFild = new StringField(fieldName, text, Field.Store.YES);
        //textFild.setBoost(2);

        doc.add(strField);
        doc.add(textFild);
        iwriter.addDocument(doc);

        doc = new Document();
        strField = new StringField("ID", "10001", Field.Store.YES);
        textFild = new StringField(fieldName, text1, Field.Store.YES);
        //strField.setBoost(1);
        doc.add(strField);
        doc.add(textFild);
        iwriter.addDocument(doc);

        doc = new Document();
        strField = new StringField("ID", "10002", Field.Store.YES);
        //            textFild = new TextField(fieldName, text2, Field.Store.YES);
        textFild = new StringField(fieldName, text2, Field.Store.YES);
        //strField.setBoost(1);
        doc.add(strField);
        doc.add(textFild);
        iwriter.addDocument(doc);

        iwriter.close();

        // ?**********************************
        // ?
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        // QueryParser?Query
        QueryParser qp = new QueryParser(Version.LUCENE_4_9, fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        // ?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        // 
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.spd.ukraine.lucenewebsearch1.web.IndexingController.java

/**
 * Method used to search phrase in search database.
 *
 * @param phrase//  ww  w  . j  a v a 2s. c o  m
 * @param fieldName to search in it
 * @return Collection<WebPage> with search data saved in WebPage fields
 * @throws java.io.IOException
 * @throws org.apache.lucene.queryparser.classic.ParseException
 */
public Collection<WebPage> searchPhrase(String phrase, String fieldName) throws IOException, ParseException {
    if (null == phrase) {
        return new ArrayList<>();
    }
    //        searchIndex(indexDir, phrase, hits);
    //        Directory directory = FSDirectory.open(indexDir);//.getDirectory(indexDir);
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    QueryParser parser = new QueryParser(Version.LUCENE_43, fieldName, new StandardAnalyzer(Version.LUCENE_43)); //new SimpleAnalyzer()
    org.apache.lucene.search.Query query = parser.parse(phrase);
    TopDocs topDocs = searcher.search(query, MAX_HITS);
    ScoreDoc[] hits = topDocs.scoreDocs;
    List<WebPage> searchResults = new ArrayList<>();
    for (ScoreDoc hit : hits) {
        int docId = hit.doc;
        org.apache.lucene.document.Document d = searcher.doc(docId);
        System.out.println("'" + d.get(URL_FIELD) + "' '" + d.get(TITLE_FIELD) + "'"); // + " " + d.get("content"));
        WebPage webPage = new WebPage();
        webPage.setUrl(d.get(URL_FIELD));
        webPage.setTitle(highLightPhrase(d.get(TITLE_FIELD), phrase));
        webPage.setContent(truncateText(d.get(CONTENT_FIELD), phrase));
        searchResults.add(webPage);
    }
    System.out.println("Found " + hits.length);
    return new LinkedHashSet<>(searchResults);
}

From source file:com.svenjacobs.lugaene.GaeDirectoryTest.java

License:Apache License

@Test
public void wholeCycle() throws Exception {

    // Index/*  w ww.j  a v a  2  s .  c o  m*/

    final Directory directory = new GaeDirectory("Test");
    final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);

    final IndexWriterConfig config = GaeIndexWriterConfigHelper.create(Version.LUCENE_44, analyzer);
    final IndexWriter indexWriter = new IndexWriter(directory, config);

    final Document doc1 = new Document();

    doc1.add(new StringField(FIELD_TITLE, "Title1", Field.Store.YES));
    doc1.add(new TextField(FIELD_CONTENTS, "keyword1 keyword2 lorem ipsum", Field.Store.NO));

    indexWriter.addDocument(doc1);

    final Document doc2 = new Document();

    doc2.add(new StringField(FIELD_TITLE, "Title2", Field.Store.YES));
    doc2.add(new TextField(FIELD_CONTENTS, "keyword3 keyword4 lorem ipsum", Field.Store.NO));

    indexWriter.addDocument(doc2);

    indexWriter.close();

    // Search

    final DirectoryReader reader = DirectoryReader.open(directory);
    final IndexSearcher searcher = new IndexSearcher(reader);

    final BooleanQuery query = new BooleanQuery();

    query.add(new TermQuery(new Term(FIELD_TITLE, "Title1")), BooleanClause.Occur.MUST);
    query.add(new TermQuery(new Term(FIELD_CONTENTS, "lorem")), BooleanClause.Occur.MUST);

    ScoreDoc[] hits = searcher.search(query, 100).scoreDocs;

    assertThat(hits.length, is(1));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title1"));

    hits = searcher.search(new TermQuery(new Term(FIELD_CONTENTS, "lorem")), 100).scoreDocs;

    assertThat(hits.length, is(2));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title1"));
    assertThat(searcher.doc(hits[1].doc).get(FIELD_TITLE), is("Title2"));

    hits = searcher.search(new TermQuery(new Term(FIELD_CONTENTS, "keyword3")), 100).scoreDocs;

    assertThat(hits.length, is(1));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title2"));
}

From source file:com.sxc.lucene.analysis.codec.MetaphoneAnalyzerTest.java

License:Apache License

public void testKoolKat() throws Exception {
    RAMDirectory directory = new RAMDirectory();
    Analyzer analyzer = new MetaphoneReplacementAnalyzer();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    IndexWriter writer = new IndexWriter(directory, indexWriterConfig);
    Document doc = new Document();
    doc.add(new TextField("contents", "cool cat", Field.Store.YES));
    writer.addDocument(doc);/*from   www. j a v a  2s . co m*/
    writer.close();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));
    Query query = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse("kool kat");
    TopDocs hits = searcher.search(query, 1);
    assertEquals(1, hits.totalHits);
    int docID = hits.scoreDocs[0].doc;
    doc = searcher.doc(docID);
    assertEquals("cool cat", doc.get("contents"));
    searcher.getIndexReader().close();
}

From source file:com.sxc.lucene.searching.BasicSearchingTest.java

License:Apache License

public void testQueryParser() throws Exception {
    IndexReader reader = DirectoryReader.open(directory); // A
    IndexSearcher searcher = new IndexSearcher(reader); // B

    QueryParser parser = new QueryParser(Version.LUCENE_47, "contents",
            new SmartChineseAnalyzer(Version.LUCENE_47));

    Query query = parser.parse("* OR *");
    TopDocs docs = searcher.search(query, 10);
    assertEquals(2, docs.totalHits);// w w  w.  j  ava  2  s  .c  om
    Document d = searcher.doc(docs.scoreDocs[0].doc);
    assertEquals("", d.get("country"));

    directory.close();
}

From source file:com.tamingtext.fuzzy.OverlapMeasures.java

License:Apache License

public TopDocs cosine(String queryTerm, int n, String... terms) throws IOException, ParseException {
    Directory directory = new RAMDirectory();
    final Pattern pattern = Pattern.compile(".");
    Analyzer analyzer = new Analyzer() {
        @Override/*from   www.  j av a 2s . c om*/
        public TokenStream tokenStream(String fieldName, Reader reader) {
            TokenStream result = null;
            try {
                result = new PatternTokenizer(reader, pattern, 0);
            } catch (IOException e) {
            }
            return result;
        }
    };
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, analyzer);
    IndexWriter writer = new IndexWriter(directory, conf);
    for (String term : terms) {
        Document doc = new Document();
        doc.add(new Field("chars", term, Field.Store.YES, Field.Index.ANALYZED));
        writer.addDocument(doc);
    }
    writer.close();
    IndexReader reader = IndexReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), terms.length);
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        System.out.println("Id: " + topDocs.scoreDocs[i].doc + " Val: "
                + searcher.doc(topDocs.scoreDocs[i].doc).get("chars"));
    }
    QueryParser qp = new QueryParser(Version.LUCENE_36, "chars", analyzer);
    Query query = qp.parse(queryTerm);
    return searcher.search(query, n);
}