Example usage for org.apache.lucene.index LeafReader terms

List of usage examples for org.apache.lucene.index LeafReader terms

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader terms.

Prototype

public abstract Terms terms(String field) throws IOException;

Source Link

Document

Returns the Terms index for this field, or null if it has none.

Usage

From source file:br.pucminas.ri.jsearch.queryexpansion.RocchioQueryExpansion.java

License:Open Source License

private float getScore(Directory directory, String term) throws CorruptIndexException, IOException {

    try (IndexReader idxReader = DirectoryReader.open(directory)) {

        ConcreteTFIDFSimilarity sim = new ConcreteTFIDFSimilarity();

        for (LeafReaderContext context : idxReader.leaves()) {
            LeafReader reader = context.reader();

            try {
                Terms terms = reader.terms(Constants.DOC_CONTENT);
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum postings = null;

                BytesRef text;//from   www.  j a  v a 2 s  . c o m
                while ((text = termsEnum.next()) != null) {
                    postings = termsEnum.postings(postings);
                    if (text.utf8ToString().equalsIgnoreCase(term)) {

                        while (postings.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                            int freq = postings.freq();
                            float tf = sim.tf(freq);
                            float idf = sim.idf(termsEnum.docFreq(), indexReader.numDocs());
                            return tf * idf;
                        }
                    }
                }

            } catch (IOException ex) {
                Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

    }

    return 0;
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

private void verify(Directory dir) throws Exception {
    DirectoryReader ir = DirectoryReader.open(dir);
    for (LeafReaderContext leaf : ir.leaves()) {
        LeafReader leafReader = leaf.reader();
        assertTerms(leafReader.terms("field1docs"), leafReader.terms("field2freqs"), true);
        assertTerms(leafReader.terms("field3positions"), leafReader.terms("field4offsets"), true);
        assertTerms(leafReader.terms("field4offsets"), leafReader.terms("field5payloadsFixed"), true);
        assertTerms(leafReader.terms("field5payloadsFixed"), leafReader.terms("field6payloadsVariable"), true);
        assertTerms(leafReader.terms("field6payloadsVariable"), leafReader.terms("field7payloadsFixedOffsets"),
                true);/*w w  w  .ja  v a  2  s  . co m*/
        assertTerms(leafReader.terms("field7payloadsFixedOffsets"),
                leafReader.terms("field8payloadsVariableOffsets"), true);
    }
    ir.close();
}

From source file:com.shaie.annots.AnnotationSearchExample.java

License:Apache License

/** Prints the terms indexed under the given field. */
static void printFieldTerms(LeafReader reader, String field) throws IOException {
    System.out.println("Terms for field: " + field);
    TermsEnum te = reader.terms(field).iterator(null);
    BytesRef scratch;//from   w w  w .  j  a va  2 s.co m
    while ((scratch = te.next()) != null) {
        System.out.println("  " + scratch.utf8ToString());
    }
}

From source file:com.shaie.PhraseVsSpanQuery.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer());
    final IndexWriter writer = new IndexWriter(dir, conf);

    final Document doc = new Document();
    doc.add(new TextField("f", new TokenStream() {
        final PositionIncrementAttribute pos = addAttribute(PositionIncrementAttribute.class);
        final CharTermAttribute term = addAttribute(CharTermAttribute.class);
        boolean first = true, done = false;

        @Override/*from   w w  w.j  a v a  2 s.  com*/
        public boolean incrementToken() throws IOException {
            if (done) {
                return false;
            }
            if (first) {
                term.setEmpty().append("a");
                pos.setPositionIncrement(1);
                first = false;
            } else {
                term.setEmpty().append("b");
                pos.setPositionIncrement(0);
                done = true;
            }
            return true;
        }
    }));
    writer.addDocument(doc);
    writer.close();

    final DirectoryReader reader = DirectoryReader.open(dir);
    final IndexSearcher searcher = new IndexSearcher(reader);
    final LeafReader ar = reader.leaves().get(0).reader();
    final TermsEnum te = ar.terms("f").iterator();
    BytesRef scratch = new BytesRef();
    while ((scratch = te.next()) != null) {
        System.out.println(scratch.utf8ToString());
        final PostingsEnum dape = ar.postings(new Term("f", scratch.utf8ToString()));
        System.out.println("  doc=" + dape.nextDoc() + ", pos=" + dape.nextPosition());
    }

    System.out.println();

    // try a phrase query with a slop
    final PhraseQuery pqNoSlop = buildPhraseQuery(0);
    System.out.println("searching for \"a b\"; num results = " + searcher.search(pqNoSlop, 10).totalHits);

    final PhraseQuery pqSlop1 = buildPhraseQuery(1);
    System.out.println("searching for \"a b\"~1; num results = " + searcher.search(pqSlop1, 10).totalHits);

    final PhraseQuery pqSlop3 = buildPhraseQuery(3);
    System.out.println("searching for \"a b\"~3; num results = " + searcher.search(pqSlop3, 10).totalHits);

    final SpanNearQuery snqUnOrdered = new SpanNearQuery(
            new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1,
            false);
    System.out.println("searching for SpanNearUnordered('a', 'b'), slop=1; num results = "
            + searcher.search(snqUnOrdered, 10).totalHits);

    final SpanNearQuery snqOrdered = new SpanNearQuery(
            new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1,
            true);
    System.out.println("searching for SpanNearOrdered('a', 'b'), slop=1; num results = "
            + searcher.search(snqOrdered, 10).totalHits);

    reader.close();
}

From source file:com.shaie.utils.IndexUtils.java

License:Apache License

/** Prints the terms indexed under the given fields. */
public static void printFieldTerms(LeafReader reader, String... fields) throws IOException {
    for (final String field : fields) {
        System.out.println(format("Terms for field [%s]:", field));
        final TermsEnum te = reader.terms(field).iterator();
        BytesRef scratch;/* w ww.j a  va 2 s .  com*/
        while ((scratch = te.next()) != null) {
            System.out.println(format("  %s", scratch.utf8ToString()));
        }
    }
}

From source file:com.shaie.utils.IndexUtils.java

License:Apache License

/** Prints the terms indexed under the given fields with full postings information. */
public static void printFieldTermsWithInfo(LeafReader reader, String... fields) throws IOException {
    for (final String field : fields) {
        System.out.println(format("Terms for field [%s], with positional info:", field));
        final TermsEnum te = reader.terms(field).iterator();
        BytesRef scratch;//from ww w . j av  a2  s.c  o m
        PostingsEnum postings = null;
        while ((scratch = te.next()) != null) {
            System.out.println(format("  %s", scratch.utf8ToString()));
            postings = te.postings(postings, PostingsEnum.ALL);
            for (postings.nextDoc(); postings.docID() != DocIdSetIterator.NO_MORE_DOCS; postings.nextDoc()) {
                final Map<Integer, BytesRef> positions = Maps.newTreeMap();
                boolean addedPayload = false;
                for (int i = 0; i < postings.freq(); i++) {
                    final int pos = postings.nextPosition();
                    final BytesRef payload = postings.getPayload();
                    if (payload != null) {
                        positions.put(pos, BytesRef.deepCopyOf(payload));
                        addedPayload = true;
                    } else {
                        positions.put(pos, null);
                    }
                }
                if (addedPayload) {
                    System.out.println(
                            format("    doc=%d, freq=%d", postings.docID(), postings.freq(), positions));
                    for (final Entry<Integer, BytesRef> e : positions.entrySet()) {
                        System.out.println(format("      pos=%d, payload=%s", e.getKey(), e.getValue()));
                    }
                } else {
                    System.out.println(format("    doc=%d, freq=%d, pos=%s", postings.docID(), postings.freq(),
                            positions.keySet()));
                }
            }
        }
    }
}

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java

License:Open Source License

@SuppressFBWarnings("EXS_EXCEPTION_SOFTENING_NO_CONSTRAINTS")
@Override/*from   w  w  w .ja  v a2  s .  co  m*/
public long getTermFrequency(@NotNull final BytesRef term) {
    // try get a cached value first
    @Nullable
    Long tf = this.cache_tf.get(term);
    if (tf == null) {
        tf = 0L;
        for (final LeafReaderContext lrc : this.index.reader.leaves()) {
            final LeafReader r = lrc.reader();
            long fieldTf = 0L;
            if (r.numDocs() > 0) {
                try {
                    for (final String s : r.fields()) {
                        @Nullable
                        final Terms terms = r.terms(s);
                        if (terms != null) {
                            final TermsEnum termsEnum = terms.iterator(null);
                            if (termsEnum.seekExact(term)) {
                                fieldTf += termsEnum.totalTermFreq();
                            }
                        }
                    }
                } catch (final IOException e) {
                    throw new UncheckedIOException(e);
                }
            }
            tf += fieldTf;
        }
        this.cache_tf.put(BytesRef.deepCopyOf(term), tf);
    }

    return tf;
}

From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs)
        throws IOException {
    FixedBitSet checkBits;//from   w ww  .ja  va2s  .  c om
    final LeafReader reader = context.reader();
    final int maxDoc = reader.maxDoc();

    BitSet finalBits = new SparseFixedBitSet(maxDoc);
    if (acceptDocs == null) {
        checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs());
        if (checkBits == null) {
            // all live
            checkBits = new FixedBitSet(maxDoc);
            checkBits.set(0, checkBits.length());
        }
    } else {
        checkBits = BitsUtils.bits2FixedBitSet(acceptDocs);
    }

    @Nullable
    final Terms terms = reader.terms(this.field);
    if (terms != null) {
        final int termsDocCount = terms.getDocCount();

        if (termsDocCount != 0) {
            if (termsDocCount == maxDoc) {
                // all matching
                finalBits = checkBits;
            } else {
                @Nullable
                final Terms t = reader.terms(this.field);
                if (t != null) {
                    PostingsEnum pe = null;
                    final TermsEnum te = t.iterator(null);
                    int docId;
                    while (te.next() != null) {
                        pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE);
                        while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (checkBits.getAndClear(docId)) {
                                finalBits.set(docId);
                            }
                        }
                    }
                }
            }
        }
    }
    return new BitDocIdSet(finalBits);
}

From source file:io.anserini.integration.IndexerTest.java

License:Apache License

private void dumpPostings(IndexReader reader) throws IOException {
    // This is how you iterate through terms in the postings list.
    LeafReader leafReader = reader.leaves().get(0).reader();
    TermsEnum termsEnum = leafReader.terms("text").iterator();
    BytesRef bytesRef = termsEnum.next();
    while (bytesRef != null) {
        // This is the current term in the dictionary.
        String token = bytesRef.utf8ToString();
        Term term = new Term("text", token);
        System.out.print(token + " (df = " + reader.docFreq(term) + "):");

        PostingsEnum postingsEnum = leafReader.postings(term);
        while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            System.out.print(String.format(" (%s, %s)", postingsEnum.docID(), postingsEnum.freq()));
        }/*from   w ww .  j a v  a  2 s  .c o  m*/
        System.out.println("");

        bytesRef = termsEnum.next();
    }
}

From source file:org.apache.solr.schema.TestSortableTextField.java

License:Apache License

public void testWhiteboxIndexReader() throws Exception {
    assertU(adoc("id", "1", "whitespace_stxt", "how now brown cow ?", "whitespace_m_stxt", "xxx",
            "whitespace_m_stxt", "yyy", "whitespace_f_stxt", "aaa bbb", "keyword_stxt", "Blarggghhh!"));
    assertU(commit());//from   w w  w  . j a v a  2s  . co  m

    final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false);
    try {
        final LeafReader r = searcher.get().getSlowAtomicReader();

        // common cases...
        for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt", "whitespace_stxt",
                "whitespace_f_stxt", "whitespace_l_stxt")) {
            assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field));
            assertEquals("DocValuesType: " + field, DocValuesType.SORTED,
                    r.getFieldInfos().fieldInfo(field).getDocValuesType());
            assertNotNull("DocValues: " + field, r.getSortedDocValues(field));
            assertNotNull("Terms: " + field, r.terms(field));

        }

        // special cases...
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt"));
        assertEquals(DocValuesType.NONE,
                r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType());
        assertNull(r.getSortedDocValues("whitespace_nodv_stxt"));
        assertNotNull(r.terms("whitespace_nodv_stxt"));
        // 
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt"));
        assertEquals(DocValuesType.SORTED,
                r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType());
        assertNotNull(r.getSortedDocValues("whitespace_nois_stxt"));
        assertNull(r.terms("whitespace_nois_stxt"));
        //
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt"));
        assertEquals(DocValuesType.SORTED_SET,
                r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType());
        assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt"));
        assertNotNull(r.terms("whitespace_m_stxt"));

    } finally {
        if (null != searcher) {
            searcher.decref();
        }
    }
}