Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:com.test.LuceneDemo.java

License:Apache License

@Test
public void test() throws IOException, org.apache.lucene.queryparser.classic.ParseException {
    Analyzer analyzer = new StandardAnalyzer();

    // Store the index in memory:
    Directory directory = new RAMDirectory();
    // To store an index on disk, use this instead:
    //Directory directory = FSDirectory.open("/tmp/testindex");
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter iwriter = new IndexWriter(directory, config);
    Document doc = new Document();
    String text = "This is the text to be indexed.";
    doc.add(new Field("fieldname", text, TextField.TYPE_STORED));
    iwriter.addDocument(doc);/*from  w  ww . j a  v  a 2s . com*/
    iwriter.close();

    // Now search the index:
    DirectoryReader ireader = DirectoryReader.open(directory);
    IndexSearcher isearcher = new IndexSearcher(ireader);
    // Parse a simple query that searches for "text":
    QueryParser parser = new QueryParser("fieldname", analyzer);
    Query query = parser.parse("indexed");
    ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
    assertEquals(1, hits.length);
    // Iterate through the results:
    for (int i = 0; i < hits.length; i++) {
        Document hitDoc = isearcher.doc(hits[i].doc);
        assertEquals("This is the text to be indexed.", hitDoc.get("fieldname"));
    }
    ireader.close();
    directory.close();
}

From source file:com.tistory.devyongsik.demo.SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {

    String index = "/user/need4spd/Java/lucene_index/"; //1.    
    String field = "contents"; //2.    
    String queryString = null; //3.   
    int hitsPerPage = 10; //4.       

    //5.      IndexSearcher .
    IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(index)));

    //6.     Analyzer .
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);

    //7.      Query    QueryParser .
    QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer);
    Query query = parser.parse(queryString);

    System.out.println("Query String : " + queryString);
    System.out.println("Query : " + query.toString());
    System.out.println("Searching for: " + query.toString(field));

    //8.  .      .   TF-IDF    .
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    for (int i = 0; i < numTotalHits; i++) {
        System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); //9.     ...

        Document doc = searcher.doc(hits[i].doc); //10.    .
        String path = doc.get("path");
        if (path != null) {
            System.out.println((i + 1) + ". " + path);
            String title = doc.get("title");
            if (title != null) {
                System.out.println("   Title: " + doc.get("title"));
            }//  w  w w  .j  a  va 2s  .co m
        } else {
            System.out.println((i + 1) + ". " + "No path for this document");
        }
    }

    searcher.close();
}

From source file:com.tripod.lucene.service.AbstractLuceneService.java

License:Apache License

/**
 * @param searcher the IndexSearcher//from  w ww. j  a v a  2 s .c o  m
 * @param doc the doc to load
 * @param fieldsToLoad the fields of the doc to load
 * @return the Document with the given fields loaded
 * @throws IOException if an error occurs loading the Document
 */
protected Document getDoc(final IndexSearcher searcher, final int doc, final Set<String> fieldsToLoad)
        throws IOException {
    if (fieldsToLoad == null || fieldsToLoad.size() == 0
            || (fieldsToLoad.size() == 1 && fieldsToLoad.contains(Field.ALL_FIELDS.getName()))) {
        return searcher.doc(doc);
    } else {
        return searcher.doc(doc, fieldsToLoad);
    }
}

From source file:com.vmware.dcp.services.common.LuceneBlobIndexService.java

License:Open Source License

private void queryIndex(String key, Operation op) throws Throwable {
    IndexWriter w = this.writer;
    if (w == null) {
        op.fail(new CancellationException());
        return;//from  www .  j av  a 2s  .c  o m
    }

    IndexSearcher s = updateSearcher(key, w);
    Query linkQuery = new TermQuery(new Term(URI_PARAM_NAME_KEY, key));
    TopDocs hits = s.search(linkQuery, 1, this.timeSort, false, false);
    if (hits.totalHits == 0) {
        op.complete();
        return;
    }

    Document hitDoc = s.doc(hits.scoreDocs[0].doc);
    BytesRef content = hitDoc.getBinaryValue(LUCENE_FIELD_NAME_BINARY_CONTENT);
    long updateTime = Long.parseLong(hitDoc.get(URI_PARAM_NAME_UPDATE_TIME));
    Object hydratedInstance = Utils.fromBytes(content.bytes, content.offset, content.length);
    applyBlobRetentionPolicy(linkQuery, updateTime);
    op.setBodyNoCloning(hydratedInstance).complete();

}

From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java

License:Open Source License

/**
 * Deletes all indexed documents with range of deleteCount,indexed with the specified self link
 *
 * @throws Throwable//from  w w w .j  a  v  a  2  s . c  o  m
 */
private void deleteDocumentsFromIndex(Operation delete, String link, SelfLinkInfo info, long versionsToKeep)
        throws Throwable {
    IndexWriter wr = this.writer;
    if (wr == null) {
        delete.fail(new CancellationException());
        return;
    }

    Query linkQuery = new TermQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, link));

    IndexSearcher s = updateSearcher(link, Integer.MAX_VALUE, wr);
    if (s == null) {
        delete.fail(new CancellationException());
        return;
    }

    TopDocs results;

    results = s.search(linkQuery, Integer.MAX_VALUE, this.versionSort, false, false);
    if (results == null) {
        return;
    }

    ScoreDoc[] hits = results.scoreDocs;

    if (hits == null || hits.length == 0) {
        return;
    }

    Document hitDoc = s.doc(hits[0].doc);

    if (versionsToKeep == 0) {
        // we are asked to delete everything, no need to sort or query
        this.selfLinks.remove(link);
        wr.deleteDocuments(linkQuery);
        this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
        delete.complete();
        return;
    }

    if (hits.length < versionsToKeep) {
        return;
    }

    BooleanQuery bq = new BooleanQuery();
    // grab the document at the tail of the results, and use it to form a new query
    // that will delete all documents from that document up to the version at the
    // retention
    // limit
    hitDoc = s.doc(hits[hits.length - 1].doc);
    long versionLowerBound = Long.parseLong(hitDoc.get(ServiceDocument.FIELD_NAME_VERSION));
    hitDoc = s.doc(hits[(int) versionsToKeep - 1].doc);
    long versionUpperBound = Long.parseLong(hitDoc.get(ServiceDocument.FIELD_NAME_VERSION));

    NumericRangeQuery<Long> versionQuery = NumericRangeQuery.newLongRange(ServiceDocument.FIELD_NAME_VERSION,
            versionLowerBound, versionUpperBound, true, true);

    bq.add(versionQuery, Occur.MUST);
    bq.add(linkQuery, Occur.MUST);
    results = s.search(bq, Integer.MAX_VALUE);
    long now = Utils.getNowMicrosUtc();
    logInfo("trimming index for %s from %d to %d, query returned %d", link, hits.length, versionsToKeep,
            results.totalHits);
    wr.deleteDocuments(bq);
    if (info != null) {
        info.updateMicros = now;
    }
    this.indexUpdateTimeMicros = now;
    delete.complete();
}

From source file:com.vmware.demo.sgf.lucene.impl.LuceneGemFireRepositoryImpl.java

License:Apache License

private Iterable searchStoreForMultipleItems(Serializable id, boolean singleRecordRequired) {
    ArrayList results = new ArrayList();

    IndexSearcher searcher = searchManager.acquire();
    try {/*from w  w w. ja v a 2  s .com*/
        // Extract search criteria
        String field = defaultFeild;
        List<String> searchItems = null;
        if (id instanceof SearcheCriteria) {
            SearcheCriteria key = (SearcheCriteria) id;
            field = key.getSearchField();
            searchItems = key.getSearchItems();
        }

        QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);

        for (String searchText : searchItems) {
            Query query = parser.parse(searchText);
            TopDocs docs = searcher.search(query, Integer.MAX_VALUE);

            if (docs.totalHits > 0) {
                for (int i = 0; i < docs.totalHits; i++) {
                    ScoreDoc hit = docs.scoreDocs[i];
                    Document doc = searcher.doc(hit.doc);
                    Object gfKey = ObjectSerializer.deserialize(doc.getBinaryValue(GEMFIRE_KEY).bytes);

                    results.add(gfKey);
                    if (singleRecordRequired)
                        break;
                }
            }
        }

    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    } finally {
        try {
            searchManager.release(searcher);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    searcher = null;

    return results;
}

From source file:com.vmware.demo.sgf.lucene.impl.LuceneGemFireRepositoryImpl.java

License:Apache License

private Iterable searchStore(Serializable id, boolean singleRecordRequired) {
    ArrayList results = new ArrayList();

    IndexSearcher searcher = searchManager.acquire();
    try {//from w  w  w . ja va2  s. com
        // Extract search criteria
        String field = defaultFeild;
        String searchText = null;
        if (id instanceof SearcheCriteria) {
            SearcheCriteria key = (SearcheCriteria) id;
            field = key.getSearchField();
            searchText = key.getSearchText();
        } else if (id instanceof String) {
            searchText = (String) id;
        }

        QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);
        Query query = parser.parse(searchText);
        TopDocs docs = searcher.search(query, Integer.MAX_VALUE);

        if (docs.totalHits > 0) {
            for (int i = 0; i < docs.totalHits; i++) {
                ScoreDoc hit = docs.scoreDocs[i];

                Document doc = searcher.doc(hit.doc);
                Object gfKey = ObjectSerializer.deserialize(doc.getBinaryValue(GEMFIRE_KEY).bytes);

                results.add(gfKey);

                if (singleRecordRequired)
                    break;
            }
        }

    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    } finally {
        try {
            searchManager.release(searcher);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    searcher = null;
    return results;
}

From source file:com.vmware.demo.sgf.lucene.impl.LuceneGemFireRepositoryImpl.java

License:Apache License

@Override
public Iterable findAll() {
    ArrayList results = new ArrayList();

    IndexSearcher searcher = searchManager.acquire();
    try {/*  ww w.j  a v  a 2 s .c o m*/
        QueryParser parser = new QueryParser(Version.LUCENE_40, savedField, analyzer);
        Query query = parser.parse(savedFieldValue);
        TopDocs docs = searcher.search(query, Integer.MAX_VALUE);

        if (docs.totalHits > 0) {
            for (int i = 0; i < docs.totalHits; i++) {
                ScoreDoc hit = docs.scoreDocs[i];

                Document doc = searcher.doc(hit.doc);
                Object gfKey = ObjectSerializer.deserialize(doc.getBinaryValue(GEMFIRE_KEY).bytes);

                results.add(gfKey);
            }
        }

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            searchManager.release(searcher);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    searcher = null;
    return results;
}

From source file:com.vmware.xenon.services.common.LuceneBlobIndexService.java

License:Open Source License

private void queryIndex(String key, Operation op) throws Throwable {
    IndexWriter w = this.writer;
    if (w == null) {
        op.fail(new CancellationException());
        return;/*from  ww w .j av a 2s. c om*/
    }

    IndexSearcher s = updateSearcher(w);
    Query linkQuery = new TermQuery(new Term(URI_PARAM_NAME_KEY, key));
    TopDocs hits = s.search(linkQuery, 1, this.timeSort, false, false);
    if (hits.totalHits == 0) {
        op.complete();
        return;
    }

    Document hitDoc = s.doc(hits.scoreDocs[0].doc);
    BytesRef content = hitDoc.getBinaryValue(LUCENE_FIELD_NAME_BINARY_CONTENT);
    long updateTime = Long.parseLong(hitDoc.get(URI_PARAM_NAME_UPDATE_TIME));
    Object hydratedInstance = Utils.fromBytes(content.bytes, content.offset, content.length);
    applyBlobRetentionPolicy(linkQuery, updateTime);
    op.setBodyNoCloning(hydratedInstance).complete();

}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

/**
 * Deletes all indexed documents with range of deleteCount,indexed with the specified self link
 *
 * @throws Throwable//from w ww .  j  a  v  a 2 s .c  o  m
 */
private void deleteDocumentsFromIndex(Operation delete, String link, long versionsToKeep) throws Throwable {
    IndexWriter wr = this.writer;
    if (wr == null) {
        delete.fail(new CancellationException());
        return;
    }

    Query linkQuery = new TermQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, link));

    IndexSearcher s = updateSearcher(link, Integer.MAX_VALUE, wr);
    if (s == null) {
        delete.fail(new CancellationException());
        return;
    }

    TopDocs results;

    results = s.search(linkQuery, Integer.MAX_VALUE, this.versionSort, false, false);
    if (results == null) {
        return;
    }

    ScoreDoc[] hits = results.scoreDocs;

    if (hits == null || hits.length == 0) {
        return;
    }

    Document hitDoc;

    if (versionsToKeep == 0) {
        // we are asked to delete everything, no need to sort or query
        wr.deleteDocuments(linkQuery);
        this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
        delete.complete();
        return;
    }

    int versionCount = hits.length;

    hitDoc = s.doc(hits[versionCount - 1].doc);
    long versionLowerBound = Long.parseLong(hitDoc.get(ServiceDocument.FIELD_NAME_VERSION));

    hitDoc = s.doc(hits[0].doc);
    long versionUpperBound = Long.parseLong(hitDoc.get(ServiceDocument.FIELD_NAME_VERSION));

    // If the number of versions found are already less than the limit
    // then there is nothing to delete. Just exit.
    if (versionCount <= versionsToKeep) {
        return;
    }

    BooleanQuery.Builder builder = new BooleanQuery.Builder();

    // grab the document at the tail of the results, and use it to form a new query
    // that will delete all documents from that document up to the version at the
    // retention limit
    hitDoc = s.doc(hits[(int) versionsToKeep].doc);
    long cutOffVersion = Long.parseLong(hitDoc.get(ServiceDocument.FIELD_NAME_VERSION));

    Query versionQuery = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_VERSION, versionLowerBound,
            cutOffVersion);

    builder.add(versionQuery, Occur.MUST);
    builder.add(linkQuery, Occur.MUST);
    BooleanQuery bq = builder.build();

    results = s.search(bq, Integer.MAX_VALUE);

    logInfo("Version grooming for %s found %d versions from %d to %d. Trimming %d versions from %d to %d", link,
            versionCount, versionLowerBound, versionUpperBound, results.scoreDocs.length, versionLowerBound,
            cutOffVersion);

    wr.deleteDocuments(bq);

    // We have observed that sometimes Lucene search does not return all the document
    // versions in the index. Normally, the number of documents returned should be
    // equal to or more than the delta between the lower and upper versions. It can be more
    // because of duplicate document versions. If that's not the case, we add the
    // link back for retention so that the next grooming run can cleanup the missed document.
    if (versionCount < versionUpperBound - versionLowerBound + 1) {
        logWarning(
                "Adding %s back for version grooming since versionCount %d "
                        + "was lower than version delta from %d to %d.",
                link, versionCount, versionLowerBound, versionUpperBound);
        synchronized (this.linkDocumentRetentionEstimates) {
            this.linkDocumentRetentionEstimates.put(link, versionsToKeep);
        }
    }

    long now = Utils.getNowMicrosUtc();

    // Use time AFTER index was updated to be sure that it can be compared
    // against the time the searcher was updated and have this change
    // be reflected in the new searcher. If the start time would be used,
    // it is possible to race with updating the searcher and NOT have this
    // change be reflected in the searcher.
    updateLinkAccessTime(now, link);

    delete.complete();
}