List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:com.test.LuceneDemo.java
License:Apache License
@Test public void test() throws IOException, org.apache.lucene.queryparser.classic.ParseException { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter iwriter = new IndexWriter(directory, config); Document doc = new Document(); String text = "This is the text to be indexed."; doc.add(new Field("fieldname", text, TextField.TYPE_STORED)); iwriter.addDocument(doc);/*from w ww . j a v a 2s . com*/ iwriter.close(); // Now search the index: DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.parse("indexed"); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; assertEquals(1, hits.length); // Iterate through the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); } ireader.close(); directory.close(); }
From source file:com.tistory.devyongsik.demo.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String index = "/user/need4spd/Java/lucene_index/"; //1. String field = "contents"; //2. String queryString = null; //3. int hitsPerPage = 10; //4. //5. IndexSearcher . IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(index))); //6. Analyzer . Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); //7. Query QueryParser . QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer); Query query = parser.parse(queryString); System.out.println("Query String : " + queryString); System.out.println("Query : " + query.toString()); System.out.println("Searching for: " + query.toString(field)); //8. . . TF-IDF . TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); for (int i = 0; i < numTotalHits; i++) { System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); //9. ... Document doc = searcher.doc(hits[i].doc); //10. . String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); }// w w w .j a va 2s .co m } else { System.out.println((i + 1) + ". " + "No path for this document"); } } searcher.close(); }
From source file:com.tripod.lucene.service.AbstractLuceneService.java
License:Apache License
/** * @param searcher the IndexSearcher//from w ww. j a v a 2 s .c o m * @param doc the doc to load * @param fieldsToLoad the fields of the doc to load * @return the Document with the given fields loaded * @throws IOException if an error occurs loading the Document */ protected Document getDoc(final IndexSearcher searcher, final int doc, final Set<String> fieldsToLoad) throws IOException { if (fieldsToLoad == null || fieldsToLoad.size() == 0 || (fieldsToLoad.size() == 1 && fieldsToLoad.contains(Field.ALL_FIELDS.getName()))) { return searcher.doc(doc); } else { return searcher.doc(doc, fieldsToLoad); } }
From source file:com.vmware.dcp.services.common.LuceneBlobIndexService.java
License:Open Source License
private void queryIndex(String key, Operation op) throws Throwable { IndexWriter w = this.writer; if (w == null) { op.fail(new CancellationException()); return;//from www . j av a 2s .c o m } IndexSearcher s = updateSearcher(key, w); Query linkQuery = new TermQuery(new Term(URI_PARAM_NAME_KEY, key)); TopDocs hits = s.search(linkQuery, 1, this.timeSort, false, false); if (hits.totalHits == 0) { op.complete(); return; } Document hitDoc = s.doc(hits.scoreDocs[0].doc); BytesRef content = hitDoc.getBinaryValue(LUCENE_FIELD_NAME_BINARY_CONTENT); long updateTime = Long.parseLong(hitDoc.get(URI_PARAM_NAME_UPDATE_TIME)); Object hydratedInstance = Utils.fromBytes(content.bytes, content.offset, content.length); applyBlobRetentionPolicy(linkQuery, updateTime); op.setBodyNoCloning(hydratedInstance).complete(); }
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
/** * Deletes all indexed documents with range of deleteCount,indexed with the specified self link * * @throws Throwable//from w w w .j a v a 2 s . c o m */ private void deleteDocumentsFromIndex(Operation delete, String link, SelfLinkInfo info, long versionsToKeep) throws Throwable { IndexWriter wr = this.writer; if (wr == null) { delete.fail(new CancellationException()); return; } Query linkQuery = new TermQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, link)); IndexSearcher s = updateSearcher(link, Integer.MAX_VALUE, wr); if (s == null) { delete.fail(new CancellationException()); return; } TopDocs results; results = s.search(linkQuery, Integer.MAX_VALUE, this.versionSort, false, false); if (results == null) { return; } ScoreDoc[] hits = results.scoreDocs; if (hits == null || hits.length == 0) { return; } Document hitDoc = s.doc(hits[0].doc); if (versionsToKeep == 0) { // we are asked to delete everything, no need to sort or query this.selfLinks.remove(link); wr.deleteDocuments(linkQuery); this.indexUpdateTimeMicros = Utils.getNowMicrosUtc(); delete.complete(); return; } if (hits.length < versionsToKeep) { return; } BooleanQuery bq = new BooleanQuery(); // grab the document at the tail of the results, and use it to form a new query // that will delete all documents from that document up to the version at the // retention // limit hitDoc = s.doc(hits[hits.length - 1].doc); long versionLowerBound = Long.parseLong(hitDoc.get(ServiceDocument.FIELD_NAME_VERSION)); hitDoc = s.doc(hits[(int) versionsToKeep - 1].doc); long versionUpperBound = Long.parseLong(hitDoc.get(ServiceDocument.FIELD_NAME_VERSION)); NumericRangeQuery<Long> versionQuery = NumericRangeQuery.newLongRange(ServiceDocument.FIELD_NAME_VERSION, versionLowerBound, versionUpperBound, true, true); bq.add(versionQuery, Occur.MUST); bq.add(linkQuery, Occur.MUST); results = s.search(bq, Integer.MAX_VALUE); long now = Utils.getNowMicrosUtc(); logInfo("trimming index for %s from %d to %d, query returned %d", link, hits.length, versionsToKeep, results.totalHits); wr.deleteDocuments(bq); if (info != null) { info.updateMicros = now; } this.indexUpdateTimeMicros = now; delete.complete(); }
From source file:com.vmware.demo.sgf.lucene.impl.LuceneGemFireRepositoryImpl.java
License:Apache License
private Iterable searchStoreForMultipleItems(Serializable id, boolean singleRecordRequired) { ArrayList results = new ArrayList(); IndexSearcher searcher = searchManager.acquire(); try {/*from w w w. ja v a 2 s .com*/ // Extract search criteria String field = defaultFeild; List<String> searchItems = null; if (id instanceof SearcheCriteria) { SearcheCriteria key = (SearcheCriteria) id; field = key.getSearchField(); searchItems = key.getSearchItems(); } QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); for (String searchText : searchItems) { Query query = parser.parse(searchText); TopDocs docs = searcher.search(query, Integer.MAX_VALUE); if (docs.totalHits > 0) { for (int i = 0; i < docs.totalHits; i++) { ScoreDoc hit = docs.scoreDocs[i]; Document doc = searcher.doc(hit.doc); Object gfKey = ObjectSerializer.deserialize(doc.getBinaryValue(GEMFIRE_KEY).bytes); results.add(gfKey); if (singleRecordRequired) break; } } } } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } finally { try { searchManager.release(searcher); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } searcher = null; return results; }
From source file:com.vmware.demo.sgf.lucene.impl.LuceneGemFireRepositoryImpl.java
License:Apache License
private Iterable searchStore(Serializable id, boolean singleRecordRequired) { ArrayList results = new ArrayList(); IndexSearcher searcher = searchManager.acquire(); try {//from w w w . ja va2 s. com // Extract search criteria String field = defaultFeild; String searchText = null; if (id instanceof SearcheCriteria) { SearcheCriteria key = (SearcheCriteria) id; field = key.getSearchField(); searchText = key.getSearchText(); } else if (id instanceof String) { searchText = (String) id; } QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); Query query = parser.parse(searchText); TopDocs docs = searcher.search(query, Integer.MAX_VALUE); if (docs.totalHits > 0) { for (int i = 0; i < docs.totalHits; i++) { ScoreDoc hit = docs.scoreDocs[i]; Document doc = searcher.doc(hit.doc); Object gfKey = ObjectSerializer.deserialize(doc.getBinaryValue(GEMFIRE_KEY).bytes); results.add(gfKey); if (singleRecordRequired) break; } } } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } finally { try { searchManager.release(searcher); } catch (IOException e) { e.printStackTrace(); } } searcher = null; return results; }
From source file:com.vmware.demo.sgf.lucene.impl.LuceneGemFireRepositoryImpl.java
License:Apache License
@Override public Iterable findAll() { ArrayList results = new ArrayList(); IndexSearcher searcher = searchManager.acquire(); try {/* ww w.j a v a 2 s .c o m*/ QueryParser parser = new QueryParser(Version.LUCENE_40, savedField, analyzer); Query query = parser.parse(savedFieldValue); TopDocs docs = searcher.search(query, Integer.MAX_VALUE); if (docs.totalHits > 0) { for (int i = 0; i < docs.totalHits; i++) { ScoreDoc hit = docs.scoreDocs[i]; Document doc = searcher.doc(hit.doc); Object gfKey = ObjectSerializer.deserialize(doc.getBinaryValue(GEMFIRE_KEY).bytes); results.add(gfKey); } } } catch (Exception e) { e.printStackTrace(); } finally { try { searchManager.release(searcher); } catch (IOException e) { e.printStackTrace(); } } searcher = null; return results; }
From source file:com.vmware.xenon.services.common.LuceneBlobIndexService.java
License:Open Source License
private void queryIndex(String key, Operation op) throws Throwable { IndexWriter w = this.writer; if (w == null) { op.fail(new CancellationException()); return;/*from ww w .j av a 2s. c om*/ } IndexSearcher s = updateSearcher(w); Query linkQuery = new TermQuery(new Term(URI_PARAM_NAME_KEY, key)); TopDocs hits = s.search(linkQuery, 1, this.timeSort, false, false); if (hits.totalHits == 0) { op.complete(); return; } Document hitDoc = s.doc(hits.scoreDocs[0].doc); BytesRef content = hitDoc.getBinaryValue(LUCENE_FIELD_NAME_BINARY_CONTENT); long updateTime = Long.parseLong(hitDoc.get(URI_PARAM_NAME_UPDATE_TIME)); Object hydratedInstance = Utils.fromBytes(content.bytes, content.offset, content.length); applyBlobRetentionPolicy(linkQuery, updateTime); op.setBodyNoCloning(hydratedInstance).complete(); }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
/** * Deletes all indexed documents with range of deleteCount,indexed with the specified self link * * @throws Throwable//from w ww . j a v a 2 s .c o m */ private void deleteDocumentsFromIndex(Operation delete, String link, long versionsToKeep) throws Throwable { IndexWriter wr = this.writer; if (wr == null) { delete.fail(new CancellationException()); return; } Query linkQuery = new TermQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, link)); IndexSearcher s = updateSearcher(link, Integer.MAX_VALUE, wr); if (s == null) { delete.fail(new CancellationException()); return; } TopDocs results; results = s.search(linkQuery, Integer.MAX_VALUE, this.versionSort, false, false); if (results == null) { return; } ScoreDoc[] hits = results.scoreDocs; if (hits == null || hits.length == 0) { return; } Document hitDoc; if (versionsToKeep == 0) { // we are asked to delete everything, no need to sort or query wr.deleteDocuments(linkQuery); this.indexUpdateTimeMicros = Utils.getNowMicrosUtc(); delete.complete(); return; } int versionCount = hits.length; hitDoc = s.doc(hits[versionCount - 1].doc); long versionLowerBound = Long.parseLong(hitDoc.get(ServiceDocument.FIELD_NAME_VERSION)); hitDoc = s.doc(hits[0].doc); long versionUpperBound = Long.parseLong(hitDoc.get(ServiceDocument.FIELD_NAME_VERSION)); // If the number of versions found are already less than the limit // then there is nothing to delete. Just exit. if (versionCount <= versionsToKeep) { return; } BooleanQuery.Builder builder = new BooleanQuery.Builder(); // grab the document at the tail of the results, and use it to form a new query // that will delete all documents from that document up to the version at the // retention limit hitDoc = s.doc(hits[(int) versionsToKeep].doc); long cutOffVersion = Long.parseLong(hitDoc.get(ServiceDocument.FIELD_NAME_VERSION)); Query versionQuery = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_VERSION, versionLowerBound, cutOffVersion); builder.add(versionQuery, Occur.MUST); builder.add(linkQuery, Occur.MUST); BooleanQuery bq = builder.build(); results = s.search(bq, Integer.MAX_VALUE); logInfo("Version grooming for %s found %d versions from %d to %d. Trimming %d versions from %d to %d", link, versionCount, versionLowerBound, versionUpperBound, results.scoreDocs.length, versionLowerBound, cutOffVersion); wr.deleteDocuments(bq); // We have observed that sometimes Lucene search does not return all the document // versions in the index. Normally, the number of documents returned should be // equal to or more than the delta between the lower and upper versions. It can be more // because of duplicate document versions. If that's not the case, we add the // link back for retention so that the next grooming run can cleanup the missed document. if (versionCount < versionUpperBound - versionLowerBound + 1) { logWarning( "Adding %s back for version grooming since versionCount %d " + "was lower than version delta from %d to %d.", link, versionCount, versionLowerBound, versionUpperBound); synchronized (this.linkDocumentRetentionEstimates) { this.linkDocumentRetentionEstimates.put(link, versionsToKeep); } } long now = Utils.getNowMicrosUtc(); // Use time AFTER index was updated to be sure that it can be compared // against the time the searcher was updated and have this change // be reflected in the new searcher. If the start time would be used, // it is possible to race with updating the searcher and NOT have this // change be reflected in the searcher. updateLinkAccessTime(now, link); delete.complete(); }