List of usage examples for org.apache.lucene.index LeafReader postings
public final PostingsEnum postings(Term term) throws IOException
From source file:com.shaie.annots.AnnotationSearchExample.java
License:Apache License
public static void main(String[] args) throws Exception { Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer()); IndexWriter writer = new IndexWriter(dir, conf); // we need to add the annotation as a TokenStream field, therefore cannot use an Analyzer passed in the // IndexWriterConfig. Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader("quick brown fox ate the blue red chicken")); TeeSinkTokenFilter textStream = new TeeSinkTokenFilter(tokenizer); TokenStream colorAnnotationStream = new AnnotatingTokenFilter( textStream.newSinkTokenStream(new ColorsSinkFilter()), COLOR_ANNOT_TERM); Document doc = new Document(); doc.add(new TextField("text", textStream)); doc.add(new TextField("annot", colorAnnotationStream)); writer.addDocument(doc);//from ww w . j a v a 2 s. c om writer.close(); DirectoryReader reader = DirectoryReader.open(dir); LeafReader ar = reader.leaves().get(0).reader(); // we only have one segment printFieldTerms(ar, "text"); System.out.println(); final ByteArrayDataInput in = new ByteArrayDataInput(); PostingsEnum dape = ar.postings(new Term("annot", COLOR_ANNOT_TERM)); int docID = dape.nextDoc(); int freq = dape.freq(); System.out.println("Color annotation spans: doc=" + docID + ", freq=" + freq); for (int i = 0; i < freq; i++) { dape.nextPosition(); BytesRef payload = dape.getPayload(); in.reset(payload.bytes, payload.offset, payload.length); System.out.println(" start=" + in.readVInt() + ", length=" + in.readVInt()); } IndexSearcher searcher = new IndexSearcher(reader); System.out.println("\nsearching for 'red WITHIN color':"); Query q = new SpanWithinQuery(new SpanAnnotationTermQuery(new Term("annot", COLOR_ANNOT_TERM)), new SpanInclusivePositionTermQuery(new Term("text", "red"))); TopDocs td = searcher.search(q, 10); System.out.println(" num results: " + td.scoreDocs.length); System.out.println("\nsearching for 'ate WITHIN color':"); q = new SpanWithinQuery(new SpanAnnotationTermQuery(new Term("annot", COLOR_ANNOT_TERM)), new SpanInclusivePositionTermQuery(new Term("text", "ate"))); td = searcher.search(q, 10); System.out.println(" num results: " + td.scoreDocs.length); reader.close(); dir.close(); }
From source file:com.shaie.PhraseVsSpanQuery.java
License:Apache License
@SuppressWarnings("resource") public static void main(String[] args) throws Exception { final Directory dir = new RAMDirectory(); final IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer()); final IndexWriter writer = new IndexWriter(dir, conf); final Document doc = new Document(); doc.add(new TextField("f", new TokenStream() { final PositionIncrementAttribute pos = addAttribute(PositionIncrementAttribute.class); final CharTermAttribute term = addAttribute(CharTermAttribute.class); boolean first = true, done = false; @Override/*from w ww . j a va 2 s . c om*/ public boolean incrementToken() throws IOException { if (done) { return false; } if (first) { term.setEmpty().append("a"); pos.setPositionIncrement(1); first = false; } else { term.setEmpty().append("b"); pos.setPositionIncrement(0); done = true; } return true; } })); writer.addDocument(doc); writer.close(); final DirectoryReader reader = DirectoryReader.open(dir); final IndexSearcher searcher = new IndexSearcher(reader); final LeafReader ar = reader.leaves().get(0).reader(); final TermsEnum te = ar.terms("f").iterator(); BytesRef scratch = new BytesRef(); while ((scratch = te.next()) != null) { System.out.println(scratch.utf8ToString()); final PostingsEnum dape = ar.postings(new Term("f", scratch.utf8ToString())); System.out.println(" doc=" + dape.nextDoc() + ", pos=" + dape.nextPosition()); } System.out.println(); // try a phrase query with a slop final PhraseQuery pqNoSlop = buildPhraseQuery(0); System.out.println("searching for \"a b\"; num results = " + searcher.search(pqNoSlop, 10).totalHits); final PhraseQuery pqSlop1 = buildPhraseQuery(1); System.out.println("searching for \"a b\"~1; num results = " + searcher.search(pqSlop1, 10).totalHits); final PhraseQuery pqSlop3 = buildPhraseQuery(3); System.out.println("searching for \"a b\"~3; num results = " + searcher.search(pqSlop3, 10).totalHits); final SpanNearQuery snqUnOrdered = new SpanNearQuery( new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1, false); System.out.println("searching for SpanNearUnordered('a', 'b'), slop=1; num results = " + searcher.search(snqUnOrdered, 10).totalHits); final SpanNearQuery snqOrdered = new SpanNearQuery( new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1, true); System.out.println("searching for SpanNearOrdered('a', 'b'), slop=1; num results = " + searcher.search(snqOrdered, 10).totalHits); reader.close(); }
From source file:io.anserini.integration.IndexerTest.java
License:Apache License
private void dumpPostings(IndexReader reader) throws IOException { // This is how you iterate through terms in the postings list. LeafReader leafReader = reader.leaves().get(0).reader(); TermsEnum termsEnum = leafReader.terms("text").iterator(); BytesRef bytesRef = termsEnum.next(); while (bytesRef != null) { // This is the current term in the dictionary. String token = bytesRef.utf8ToString(); Term term = new Term("text", token); System.out.print(token + " (df = " + reader.docFreq(term) + "):"); PostingsEnum postingsEnum = leafReader.postings(term); while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { System.out.print(String.format(" (%s, %s)", postingsEnum.docID(), postingsEnum.freq())); }//from w w w.ja v a 2s.c om System.out.println(""); bytesRef = termsEnum.next(); } }
From source file:org.hibernate.search.spatial.impl.SpatialHashQuery.java
License:LGPL
/** * Search the index for document having the correct spatial hash cell id at given grid level. * * @param context the {@link LeafReaderContext} for which to return the {@link DocIdSet}. * @return a {@link DocIdSetIterator} with the matching document ids */// w w w . j a v a2 s. co m private DocIdSetIterator createDocIdSetIterator(LeafReaderContext context) throws IOException { if (spatialHashCellsIds.size() == 0) { return null; } final LeafReader atomicReader = context.reader(); BitDocIdSet matchedDocumentsIds = new BitDocIdSet(new FixedBitSet(atomicReader.maxDoc())); boolean found = false; for (int i = 0; i < spatialHashCellsIds.size(); i++) { Term spatialHashCellTerm = new Term(fieldName, spatialHashCellsIds.get(i)); PostingsEnum spatialHashCellsDocs = atomicReader.postings(spatialHashCellTerm); if (spatialHashCellsDocs != null) { while (true) { final int docId = spatialHashCellsDocs.nextDoc(); if (docId == DocIdSetIterator.NO_MORE_DOCS) { break; } else { matchedDocumentsIds.bits().set(docId); found = true; } } } } if (found) { return matchedDocumentsIds.iterator(); } else { return DocIdSetIterator.empty(); } }