Example usage for org.apache.lucene.index PostingsEnum docID

Introduction

In this page you can find the example usage for org.apache.lucene.index PostingsEnum docID.

Prototype

public abstract int docID();

Source Link

Document

Returns the following:

-1 if #nextDoc() or #advance(int) were not called yet.

Usage

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

/**
 * checks docs + freqs + positions + payloads, sequentially
 *///from w w w .  j  a v  a  2  s  .c o  m
public void assertDocsAndPositionsEnum(PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
    assertNotNull(leftDocs);
    assertNotNull(rightDocs);
    assertEquals(-1, leftDocs.docID());
    assertEquals(-1, rightDocs.docID());
    int docid;
    while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        assertEquals(docid, rightDocs.nextDoc());
        int freq = leftDocs.freq();
        assertEquals(freq, rightDocs.freq());
        for (int i = 0; i < freq; i++) {
            assertEquals(leftDocs.nextPosition(), rightDocs.nextPosition());
            // we don't assert offsets/payloads, they are allowed to be different
        }
    }
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

/**
 * checks docs + freqs, sequentially// ww w  .java2 s. c  o  m
 */
public void assertDocsEnum(PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
    if (leftDocs == null) {
        assertNull(rightDocs);
        return;
    }
    assertEquals(-1, leftDocs.docID());
    assertEquals(-1, rightDocs.docID());
    int docid;
    while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        assertEquals(docid, rightDocs.nextDoc());
        // we don't assert freqs, they are allowed to be different
    }
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
}

From source file:com.shaie.utils.IndexUtils.java

License:Apache License

/** Prints the terms indexed under the given fields with full postings information. */
public static void printFieldTermsWithInfo(LeafReader reader, String... fields) throws IOException {
    for (final String field : fields) {
        System.out.println(format("Terms for field [%s], with positional info:", field));
        final TermsEnum te = reader.terms(field).iterator();
        BytesRef scratch;/*from w w  w . ja  va2s  .  co m*/
        PostingsEnum postings = null;
        while ((scratch = te.next()) != null) {
            System.out.println(format("  %s", scratch.utf8ToString()));
            postings = te.postings(postings, PostingsEnum.ALL);
            for (postings.nextDoc(); postings.docID() != DocIdSetIterator.NO_MORE_DOCS; postings.nextDoc()) {
                final Map<Integer, BytesRef> positions = Maps.newTreeMap();
                boolean addedPayload = false;
                for (int i = 0; i < postings.freq(); i++) {
                    final int pos = postings.nextPosition();
                    final BytesRef payload = postings.getPayload();
                    if (payload != null) {
                        positions.put(pos, BytesRef.deepCopyOf(payload));
                        addedPayload = true;
                    } else {
                        positions.put(pos, null);
                    }
                }
                if (addedPayload) {
                    System.out.println(
                            format("    doc=%d, freq=%d", postings.docID(), postings.freq(), positions));
                    for (final Entry<Integer, BytesRef> e : positions.entrySet()) {
                        System.out.println(format("      pos=%d, payload=%s", e.getKey(), e.getValue()));
                    }
                } else {
                    System.out.println(format("    doc=%d, freq=%d, pos=%s", postings.docID(), postings.freq(),
                            positions.keySet()));
                }
            }
        }
    }
}

From source file:edu.upenn.library.solrplugins.ProofOfConceptPayloadHandler.java

License:Apache License

private NamedList<Object> buildEntryValue(long count, PostingsEnum postings, Bits liveDocs) throws IOException {
    NamedList<Object> entry = new NamedList<>();
    entry.add("count", count);
    int i = -1;/*from   www  .  j a  va2s  .  c  o m*/
    while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        if (!liveDocs.get(postings.docID())) {
            continue;
        }
        i++;
        NamedList<Object> documentEntry = new NamedList<>();
        entry.add("doc" + i, documentEntry);
        for (int j = 0; j < postings.freq(); j++) {
            postings.nextPosition();
            String extra = postings.getPayload().utf8ToString();
            documentEntry.add("position" + j, extra);
        }
    }
    return entry;
}

From source file:io.anserini.index.IndexUtils.java

License:Apache License

public void printTermCounts(String termStr) throws IOException, ParseException {
    EnglishAnalyzer ea = new EnglishAnalyzer(CharArraySet.EMPTY_SET);
    QueryParser qp = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, ea);
    TermQuery q = (TermQuery) qp.parse(termStr);
    Term t = q.getTerm();/*from   w w w . ja v a  2  s  .c  o  m*/

    System.out.println("raw term:             " + termStr);
    System.out.println("stemmed term:         " + q.toString(LuceneDocumentGenerator.FIELD_BODY));
    System.out.println("collection frequency: " + reader.totalTermFreq(t));
    System.out.println("document frequency:   " + reader.docFreq(t));

    PostingsEnum postingsEnum = MultiFields.getTermDocsEnum(reader, LuceneDocumentGenerator.FIELD_BODY,
            t.bytes());
    System.out.println("postings:\n");
    while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        System.out.printf("\t%s, %s\n", postingsEnum.docID(), postingsEnum.freq());
    }
}

From source file:io.anserini.integration.IndexerTest.java

License:Apache License

private void dumpPostings(IndexReader reader) throws IOException {
    // This is how you iterate through terms in the postings list.
    LeafReader leafReader = reader.leaves().get(0).reader();
    TermsEnum termsEnum = leafReader.terms("text").iterator();
    BytesRef bytesRef = termsEnum.next();
    while (bytesRef != null) {
        // This is the current term in the dictionary.
        String token = bytesRef.utf8ToString();
        Term term = new Term("text", token);
        System.out.print(token + " (df = " + reader.docFreq(term) + "):");

        PostingsEnum postingsEnum = leafReader.postings(term);
        while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            System.out.print(String.format(" (%s, %s)", postingsEnum.docID(), postingsEnum.freq()));
        }// w ww. j  a  v  a  2 s  .co  m
        System.out.println("");

        bytesRef = termsEnum.next();
    }
}

From source file:org.apache.solr.handler.component.AlfrescoLukeRequestHandler.java

License:Open Source License

protected static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException {
    TermsEnum termsEnum = terms.iterator();
    if (termsEnum.next() == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
        return null;
    }/*ww  w  . j av  a  2s.  co m*/
    PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.NONE);
    final Bits liveDocs = reader.getLiveDocs();
    if (postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS
            || (liveDocs != null && liveDocs.get(postingsEnum.docID()))) {
        return null;
    }
    return reader.document(postingsEnum.docID());
}

From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndexTest.java

License:Open Source License

@Test
public void testAddStatement() throws IOException, ParseException {
    // add a statement to an index
    index.begin();// ww w  . ja  va 2 s.  c  om
    index.addStatement(statement11);
    index.commit();

    // check that it arrived properly
    DirectoryReader reader = DirectoryReader.open(directory);
    assertEquals(1, reader.numDocs());

    Term term = new Term(SearchFields.URI_FIELD_NAME, subject.toString());
    PostingsEnum docs = termDocs(reader, term);
    assertTrue(next(docs));

    int documentNr = docs.docID();
    Document document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));

    assertFalse(next(docs));
    reader.close();

    // add another statement
    index.begin();
    index.addStatement(statement12);
    index.commit();

    // See if everything remains consistent. We must create a new IndexReader
    // in order to be able to see the updates
    reader = DirectoryReader.open(directory);
    assertEquals(1, reader.numDocs()); // #docs should *not* have increased

    docs = termDocs(reader, term);
    assertTrue(next(docs));

    documentNr = docs.docID();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(next(docs));

    // see if we can query for these literals
    IndexSearcher searcher = new IndexSearcher(reader);
    QueryParser parser = new QueryParser(SearchFields.TEXT_FIELD_NAME, analyzer);

    Query query = parser.parse(object1.getLabel());
    System.out.println("query=" + query);
    TotalHitCountCollector results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    query = parser.parse(object2.getLabel());
    results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    reader.close();

    // remove the first statement
    index.begin();
    index.removeStatement(statement11);
    index.commit();

    // check that that statement is actually removed and that the other still
    // exists
    reader = DirectoryReader.open(directory);
    assertEquals(1, reader.numDocs());

    docs = termDocs(reader, term);
    assertTrue(next(docs));

    documentNr = docs.docID();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertNull(document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(next(docs));

    reader.close();

    // remove the other statement
    index.begin();
    index.removeStatement(statement12);
    index.commit();

    // check that there are no documents left (i.e. the last Document was
    // removed completely, rather than its remaining triple removed)
    reader = DirectoryReader.open(directory);
    assertEquals(0, reader.numDocs());
    reader.close();
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override//from   ww  w  .ja  v  a  2s . c o m
        public void close() throws IOException {
            try {
                /*
                 * write the offsets per field such that we know where
                 * we need to load the FSTs from
                 */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                CodecUtil.writeFooter(output);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                    output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                    output.writeVInt(XAnalyzingSuggester.END_BYTE);
                    output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                    output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                }
            }
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    // TODO write index header?
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override//ww  w  .ja va 2s.co m
        public void close() throws IOException {
            try { /*
                   * write the offsets per field such that we know where
                   * we need to load the FSTs from
                   */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProviderV1.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                }
            }
        }
    };
}