List of usage examples for org.apache.lucene.index LeafReader terms
public abstract Terms terms(String field) throws IOException;
From source file:br.pucminas.ri.jsearch.queryexpansion.RocchioQueryExpansion.java
License:Open Source License
private float getScore(Directory directory, String term) throws CorruptIndexException, IOException { try (IndexReader idxReader = DirectoryReader.open(directory)) { ConcreteTFIDFSimilarity sim = new ConcreteTFIDFSimilarity(); for (LeafReaderContext context : idxReader.leaves()) { LeafReader reader = context.reader(); try { Terms terms = reader.terms(Constants.DOC_CONTENT); TermsEnum termsEnum = terms.iterator(); PostingsEnum postings = null; BytesRef text;//from www. j a v a 2 s . c o m while ((text = termsEnum.next()) != null) { postings = termsEnum.postings(postings); if (text.utf8ToString().equalsIgnoreCase(term)) { while (postings.nextDoc() != PostingsEnum.NO_MORE_DOCS) { int freq = postings.freq(); float tf = sim.tf(freq); float idf = sim.idf(termsEnum.docFreq(), indexReader.numDocs()); return tf * idf; } } } } catch (IOException ex) { Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex); } } } return 0; }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java
License:Apache License
private void verify(Directory dir) throws Exception { DirectoryReader ir = DirectoryReader.open(dir); for (LeafReaderContext leaf : ir.leaves()) { LeafReader leafReader = leaf.reader(); assertTerms(leafReader.terms("field1docs"), leafReader.terms("field2freqs"), true); assertTerms(leafReader.terms("field3positions"), leafReader.terms("field4offsets"), true); assertTerms(leafReader.terms("field4offsets"), leafReader.terms("field5payloadsFixed"), true); assertTerms(leafReader.terms("field5payloadsFixed"), leafReader.terms("field6payloadsVariable"), true); assertTerms(leafReader.terms("field6payloadsVariable"), leafReader.terms("field7payloadsFixedOffsets"), true);/*w w w .ja v a 2 s . co m*/ assertTerms(leafReader.terms("field7payloadsFixedOffsets"), leafReader.terms("field8payloadsVariableOffsets"), true); } ir.close(); }
From source file:com.shaie.annots.AnnotationSearchExample.java
License:Apache License
/** Prints the terms indexed under the given field. */ static void printFieldTerms(LeafReader reader, String field) throws IOException { System.out.println("Terms for field: " + field); TermsEnum te = reader.terms(field).iterator(null); BytesRef scratch;//from w w w . j a va 2 s.co m while ((scratch = te.next()) != null) { System.out.println(" " + scratch.utf8ToString()); } }
From source file:com.shaie.PhraseVsSpanQuery.java
License:Apache License
@SuppressWarnings("resource") public static void main(String[] args) throws Exception { final Directory dir = new RAMDirectory(); final IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer()); final IndexWriter writer = new IndexWriter(dir, conf); final Document doc = new Document(); doc.add(new TextField("f", new TokenStream() { final PositionIncrementAttribute pos = addAttribute(PositionIncrementAttribute.class); final CharTermAttribute term = addAttribute(CharTermAttribute.class); boolean first = true, done = false; @Override/*from w w w.j a v a 2 s. com*/ public boolean incrementToken() throws IOException { if (done) { return false; } if (first) { term.setEmpty().append("a"); pos.setPositionIncrement(1); first = false; } else { term.setEmpty().append("b"); pos.setPositionIncrement(0); done = true; } return true; } })); writer.addDocument(doc); writer.close(); final DirectoryReader reader = DirectoryReader.open(dir); final IndexSearcher searcher = new IndexSearcher(reader); final LeafReader ar = reader.leaves().get(0).reader(); final TermsEnum te = ar.terms("f").iterator(); BytesRef scratch = new BytesRef(); while ((scratch = te.next()) != null) { System.out.println(scratch.utf8ToString()); final PostingsEnum dape = ar.postings(new Term("f", scratch.utf8ToString())); System.out.println(" doc=" + dape.nextDoc() + ", pos=" + dape.nextPosition()); } System.out.println(); // try a phrase query with a slop final PhraseQuery pqNoSlop = buildPhraseQuery(0); System.out.println("searching for \"a b\"; num results = " + searcher.search(pqNoSlop, 10).totalHits); final PhraseQuery pqSlop1 = buildPhraseQuery(1); System.out.println("searching for \"a b\"~1; num results = " + searcher.search(pqSlop1, 10).totalHits); final PhraseQuery pqSlop3 = buildPhraseQuery(3); System.out.println("searching for \"a b\"~3; num results = " + searcher.search(pqSlop3, 10).totalHits); final SpanNearQuery snqUnOrdered = new SpanNearQuery( new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1, false); System.out.println("searching for SpanNearUnordered('a', 'b'), slop=1; num results = " + searcher.search(snqUnOrdered, 10).totalHits); final SpanNearQuery snqOrdered = new SpanNearQuery( new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1, true); System.out.println("searching for SpanNearOrdered('a', 'b'), slop=1; num results = " + searcher.search(snqOrdered, 10).totalHits); reader.close(); }
From source file:com.shaie.utils.IndexUtils.java
License:Apache License
/** Prints the terms indexed under the given fields. */ public static void printFieldTerms(LeafReader reader, String... fields) throws IOException { for (final String field : fields) { System.out.println(format("Terms for field [%s]:", field)); final TermsEnum te = reader.terms(field).iterator(); BytesRef scratch;/* w ww.j a va 2 s . com*/ while ((scratch = te.next()) != null) { System.out.println(format(" %s", scratch.utf8ToString())); } } }
From source file:com.shaie.utils.IndexUtils.java
License:Apache License
/** Prints the terms indexed under the given fields with full postings information. */ public static void printFieldTermsWithInfo(LeafReader reader, String... fields) throws IOException { for (final String field : fields) { System.out.println(format("Terms for field [%s], with positional info:", field)); final TermsEnum te = reader.terms(field).iterator(); BytesRef scratch;//from ww w . j av a2 s.c o m PostingsEnum postings = null; while ((scratch = te.next()) != null) { System.out.println(format(" %s", scratch.utf8ToString())); postings = te.postings(postings, PostingsEnum.ALL); for (postings.nextDoc(); postings.docID() != DocIdSetIterator.NO_MORE_DOCS; postings.nextDoc()) { final Map<Integer, BytesRef> positions = Maps.newTreeMap(); boolean addedPayload = false; for (int i = 0; i < postings.freq(); i++) { final int pos = postings.nextPosition(); final BytesRef payload = postings.getPayload(); if (payload != null) { positions.put(pos, BytesRef.deepCopyOf(payload)); addedPayload = true; } else { positions.put(pos, null); } } if (addedPayload) { System.out.println( format(" doc=%d, freq=%d", postings.docID(), postings.freq(), positions)); for (final Entry<Integer, BytesRef> e : positions.entrySet()) { System.out.println(format(" pos=%d, payload=%s", e.getKey(), e.getValue())); } } else { System.out.println(format(" doc=%d, freq=%d, pos=%s", postings.docID(), postings.freq(), positions.keySet())); } } } } }
From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java
License:Open Source License
@SuppressFBWarnings("EXS_EXCEPTION_SOFTENING_NO_CONSTRAINTS") @Override/*from w w w .ja v a2 s . co m*/ public long getTermFrequency(@NotNull final BytesRef term) { // try get a cached value first @Nullable Long tf = this.cache_tf.get(term); if (tf == null) { tf = 0L; for (final LeafReaderContext lrc : this.index.reader.leaves()) { final LeafReader r = lrc.reader(); long fieldTf = 0L; if (r.numDocs() > 0) { try { for (final String s : r.fields()) { @Nullable final Terms terms = r.terms(s); if (terms != null) { final TermsEnum termsEnum = terms.iterator(null); if (termsEnum.seekExact(term)) { fieldTf += termsEnum.totalTermFreq(); } } } } catch (final IOException e) { throw new UncheckedIOException(e); } } tf += fieldTf; } this.cache_tf.put(BytesRef.deepCopyOf(term), tf); } return tf; }
From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs) throws IOException { FixedBitSet checkBits;//from w ww .ja va2s . c om final LeafReader reader = context.reader(); final int maxDoc = reader.maxDoc(); BitSet finalBits = new SparseFixedBitSet(maxDoc); if (acceptDocs == null) { checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs()); if (checkBits == null) { // all live checkBits = new FixedBitSet(maxDoc); checkBits.set(0, checkBits.length()); } } else { checkBits = BitsUtils.bits2FixedBitSet(acceptDocs); } @Nullable final Terms terms = reader.terms(this.field); if (terms != null) { final int termsDocCount = terms.getDocCount(); if (termsDocCount != 0) { if (termsDocCount == maxDoc) { // all matching finalBits = checkBits; } else { @Nullable final Terms t = reader.terms(this.field); if (t != null) { PostingsEnum pe = null; final TermsEnum te = t.iterator(null); int docId; while (te.next() != null) { pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE); while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (checkBits.getAndClear(docId)) { finalBits.set(docId); } } } } } } } return new BitDocIdSet(finalBits); }
From source file:io.anserini.integration.IndexerTest.java
License:Apache License
private void dumpPostings(IndexReader reader) throws IOException { // This is how you iterate through terms in the postings list. LeafReader leafReader = reader.leaves().get(0).reader(); TermsEnum termsEnum = leafReader.terms("text").iterator(); BytesRef bytesRef = termsEnum.next(); while (bytesRef != null) { // This is the current term in the dictionary. String token = bytesRef.utf8ToString(); Term term = new Term("text", token); System.out.print(token + " (df = " + reader.docFreq(term) + "):"); PostingsEnum postingsEnum = leafReader.postings(term); while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { System.out.print(String.format(" (%s, %s)", postingsEnum.docID(), postingsEnum.freq())); }/*from w ww . j a v a 2 s .c o m*/ System.out.println(""); bytesRef = termsEnum.next(); } }
From source file:org.apache.solr.schema.TestSortableTextField.java
License:Apache License
public void testWhiteboxIndexReader() throws Exception { assertU(adoc("id", "1", "whitespace_stxt", "how now brown cow ?", "whitespace_m_stxt", "xxx", "whitespace_m_stxt", "yyy", "whitespace_f_stxt", "aaa bbb", "keyword_stxt", "Blarggghhh!")); assertU(commit());//from w w w . j a v a 2s . co m final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false); try { final LeafReader r = searcher.get().getSlowAtomicReader(); // common cases... for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt", "whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) { assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field)); assertEquals("DocValuesType: " + field, DocValuesType.SORTED, r.getFieldInfos().fieldInfo(field).getDocValuesType()); assertNotNull("DocValues: " + field, r.getSortedDocValues(field)); assertNotNull("Terms: " + field, r.terms(field)); } // special cases... assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt")); assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType()); assertNull(r.getSortedDocValues("whitespace_nodv_stxt")); assertNotNull(r.terms("whitespace_nodv_stxt")); // assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt")); assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType()); assertNotNull(r.getSortedDocValues("whitespace_nois_stxt")); assertNull(r.terms("whitespace_nois_stxt")); // assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt")); assertEquals(DocValuesType.SORTED_SET, r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType()); assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt")); assertNotNull(r.terms("whitespace_m_stxt")); } finally { if (null != searcher) { searcher.decref(); } } }