Example usage for org.apache.lucene.index PostingsEnum docID

List of usage examples for org.apache.lucene.index PostingsEnum docID

Introduction

In this page you can find the example usage for org.apache.lucene.index PostingsEnum docID.

Prototype

public abstract int docID();

Source Link

Document

Returns the following:
  • -1 if #nextDoc() or #advance(int) were not called yet.

    Usage

    From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

    License:Apache License

    /**
     * checks docs + freqs + positions + payloads, sequentially
     *///from w w w .  j  a v  a  2  s  .c o  m
    public void assertDocsAndPositionsEnum(PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
        assertNotNull(leftDocs);
        assertNotNull(rightDocs);
        assertEquals(-1, leftDocs.docID());
        assertEquals(-1, rightDocs.docID());
        int docid;
        while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            assertEquals(docid, rightDocs.nextDoc());
            int freq = leftDocs.freq();
            assertEquals(freq, rightDocs.freq());
            for (int i = 0; i < freq; i++) {
                assertEquals(leftDocs.nextPosition(), rightDocs.nextPosition());
                // we don't assert offsets/payloads, they are allowed to be different
            }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
    }
    

    From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

    License:Apache License

    /**
     * checks docs + freqs, sequentially// ww w  .java2 s. c  o  m
     */
    public void assertDocsEnum(PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
        if (leftDocs == null) {
            assertNull(rightDocs);
            return;
        }
        assertEquals(-1, leftDocs.docID());
        assertEquals(-1, rightDocs.docID());
        int docid;
        while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            assertEquals(docid, rightDocs.nextDoc());
            // we don't assert freqs, they are allowed to be different
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
    }
    

    From source file:com.shaie.utils.IndexUtils.java

    License:Apache License

    /** Prints the terms indexed under the given fields with full postings information. */
    public static void printFieldTermsWithInfo(LeafReader reader, String... fields) throws IOException {
        for (final String field : fields) {
            System.out.println(format("Terms for field [%s], with positional info:", field));
            final TermsEnum te = reader.terms(field).iterator();
            BytesRef scratch;/*from w w  w . ja  va2s  .  co m*/
            PostingsEnum postings = null;
            while ((scratch = te.next()) != null) {
                System.out.println(format("  %s", scratch.utf8ToString()));
                postings = te.postings(postings, PostingsEnum.ALL);
                for (postings.nextDoc(); postings.docID() != DocIdSetIterator.NO_MORE_DOCS; postings.nextDoc()) {
                    final Map<Integer, BytesRef> positions = Maps.newTreeMap();
                    boolean addedPayload = false;
                    for (int i = 0; i < postings.freq(); i++) {
                        final int pos = postings.nextPosition();
                        final BytesRef payload = postings.getPayload();
                        if (payload != null) {
                            positions.put(pos, BytesRef.deepCopyOf(payload));
                            addedPayload = true;
                        } else {
                            positions.put(pos, null);
                        }
                    }
                    if (addedPayload) {
                        System.out.println(
                                format("    doc=%d, freq=%d", postings.docID(), postings.freq(), positions));
                        for (final Entry<Integer, BytesRef> e : positions.entrySet()) {
                            System.out.println(format("      pos=%d, payload=%s", e.getKey(), e.getValue()));
                        }
                    } else {
                        System.out.println(format("    doc=%d, freq=%d, pos=%s", postings.docID(), postings.freq(),
                                positions.keySet()));
                    }
                }
            }
        }
    }
    

    From source file:edu.upenn.library.solrplugins.ProofOfConceptPayloadHandler.java

    License:Apache License

    private NamedList<Object> buildEntryValue(long count, PostingsEnum postings, Bits liveDocs) throws IOException {
        NamedList<Object> entry = new NamedList<>();
        entry.add("count", count);
        int i = -1;/*from   www  .  j a  va2s  .  c  o m*/
        while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            if (!liveDocs.get(postings.docID())) {
                continue;
            }
            i++;
            NamedList<Object> documentEntry = new NamedList<>();
            entry.add("doc" + i, documentEntry);
            for (int j = 0; j < postings.freq(); j++) {
                postings.nextPosition();
                String extra = postings.getPayload().utf8ToString();
                documentEntry.add("position" + j, extra);
            }
        }
        return entry;
    }
    

    From source file:io.anserini.index.IndexUtils.java

    License:Apache License

    public void printTermCounts(String termStr) throws IOException, ParseException {
        EnglishAnalyzer ea = new EnglishAnalyzer(CharArraySet.EMPTY_SET);
        QueryParser qp = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, ea);
        TermQuery q = (TermQuery) qp.parse(termStr);
        Term t = q.getTerm();/*from   w w w . ja v a  2  s  .c  o  m*/
    
        System.out.println("raw term:             " + termStr);
        System.out.println("stemmed term:         " + q.toString(LuceneDocumentGenerator.FIELD_BODY));
        System.out.println("collection frequency: " + reader.totalTermFreq(t));
        System.out.println("document frequency:   " + reader.docFreq(t));
    
        PostingsEnum postingsEnum = MultiFields.getTermDocsEnum(reader, LuceneDocumentGenerator.FIELD_BODY,
                t.bytes());
        System.out.println("postings:\n");
        while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            System.out.printf("\t%s, %s\n", postingsEnum.docID(), postingsEnum.freq());
        }
    }
    

    From source file:io.anserini.integration.IndexerTest.java

    License:Apache License

    private void dumpPostings(IndexReader reader) throws IOException {
        // This is how you iterate through terms in the postings list.
        LeafReader leafReader = reader.leaves().get(0).reader();
        TermsEnum termsEnum = leafReader.terms("text").iterator();
        BytesRef bytesRef = termsEnum.next();
        while (bytesRef != null) {
            // This is the current term in the dictionary.
            String token = bytesRef.utf8ToString();
            Term term = new Term("text", token);
            System.out.print(token + " (df = " + reader.docFreq(term) + "):");
    
            PostingsEnum postingsEnum = leafReader.postings(term);
            while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                System.out.print(String.format(" (%s, %s)", postingsEnum.docID(), postingsEnum.freq()));
            }// w ww. j  a  v  a  2 s  .co  m
            System.out.println("");
    
            bytesRef = termsEnum.next();
        }
    }
    

    From source file:org.apache.solr.handler.component.AlfrescoLukeRequestHandler.java

    License:Open Source License

    protected static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException {
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum.next() == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
            return null;
        }/*ww  w  . j av  a  2s.  co m*/
        PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.NONE);
        final Bits liveDocs = reader.getLiveDocs();
        if (postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS
                || (liveDocs != null && liveDocs.get(postingsEnum.docID()))) {
            return null;
        }
        return reader.document(postingsEnum.docID());
    }
    

    From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndexTest.java

    License:Open Source License

    @Test
    public void testAddStatement() throws IOException, ParseException {
        // add a statement to an index
        index.begin();// ww w  . ja  va 2 s.  c  om
        index.addStatement(statement11);
        index.commit();
    
        // check that it arrived properly
        DirectoryReader reader = DirectoryReader.open(directory);
        assertEquals(1, reader.numDocs());
    
        Term term = new Term(SearchFields.URI_FIELD_NAME, subject.toString());
        PostingsEnum docs = termDocs(reader, term);
        assertTrue(next(docs));
    
        int documentNr = docs.docID();
        Document document = reader.document(documentNr);
        assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
        assertEquals(object1.getLabel(), document.get(predicate1.toString()));
    
        assertFalse(next(docs));
        reader.close();
    
        // add another statement
        index.begin();
        index.addStatement(statement12);
        index.commit();
    
        // See if everything remains consistent. We must create a new IndexReader
        // in order to be able to see the updates
        reader = DirectoryReader.open(directory);
        assertEquals(1, reader.numDocs()); // #docs should *not* have increased
    
        docs = termDocs(reader, term);
        assertTrue(next(docs));
    
        documentNr = docs.docID();
        document = reader.document(documentNr);
        assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
        assertEquals(object1.getLabel(), document.get(predicate1.toString()));
        assertEquals(object2.getLabel(), document.get(predicate2.toString()));
    
        assertFalse(next(docs));
    
        // see if we can query for these literals
        IndexSearcher searcher = new IndexSearcher(reader);
        QueryParser parser = new QueryParser(SearchFields.TEXT_FIELD_NAME, analyzer);
    
        Query query = parser.parse(object1.getLabel());
        System.out.println("query=" + query);
        TotalHitCountCollector results = new TotalHitCountCollector();
        searcher.search(query, results);
        assertEquals(1, results.getTotalHits());
    
        query = parser.parse(object2.getLabel());
        results = new TotalHitCountCollector();
        searcher.search(query, results);
        assertEquals(1, results.getTotalHits());
    
        reader.close();
    
        // remove the first statement
        index.begin();
        index.removeStatement(statement11);
        index.commit();
    
        // check that that statement is actually removed and that the other still
        // exists
        reader = DirectoryReader.open(directory);
        assertEquals(1, reader.numDocs());
    
        docs = termDocs(reader, term);
        assertTrue(next(docs));
    
        documentNr = docs.docID();
        document = reader.document(documentNr);
        assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
        assertNull(document.get(predicate1.toString()));
        assertEquals(object2.getLabel(), document.get(predicate2.toString()));
    
        assertFalse(next(docs));
    
        reader.close();
    
        // remove the other statement
        index.begin();
        index.removeStatement(statement12);
        index.commit();
    
        // check that there are no documents left (i.e. the last Document was
        // removed completely, rather than its remaining triple removed)
        reader = DirectoryReader.open(directory);
        assertEquals(0, reader.numDocs());
        reader.close();
    }
    

    From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java

    License:Apache License

    @Override
    public FieldsConsumer consumer(final IndexOutput output) throws IOException {
        CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
        return new FieldsConsumer() {
            private Map<String, Long> fieldOffsets = new HashMap<>();
    
            @Override//from   ww  w  .ja  v  a  2s . c o m
            public void close() throws IOException {
                try {
                    /*
                     * write the offsets per field such that we know where
                     * we need to load the FSTs from
                     */
                    long pointer = output.getFilePointer();
                    output.writeVInt(fieldOffsets.size());
                    for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                        output.writeString(entry.getKey());
                        output.writeVLong(entry.getValue());
                    }
                    output.writeLong(pointer);
                    CodecUtil.writeFooter(output);
                } finally {
                    IOUtils.close(output);
                }
            }
    
            @Override
            public void write(Fields fields) throws IOException {
                for (String field : fields) {
                    Terms terms = fields.terms(field);
                    if (terms == null) {
                        continue;
                    }
                    TermsEnum termsEnum = terms.iterator();
                    PostingsEnum docsEnum = null;
                    final SuggestPayload spare = new SuggestPayload();
                    int maxAnalyzedPathsForOneInput = 0;
                    final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                            maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                    int docCount = 0;
                    while (true) {
                        BytesRef term = termsEnum.next();
                        if (term == null) {
                            break;
                        }
                        docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
                        builder.startTerm(term);
                        int docFreq = 0;
                        while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                            for (int i = 0; i < docsEnum.freq(); i++) {
                                final int position = docsEnum.nextPosition();
                                AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
                                builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                                // multi fields have the same surface form so we sum up here
                                maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                            }
                            docFreq++;
                            docCount = Math.max(docCount, docsEnum.docID() + 1);
                        }
                        builder.finishTerm(docFreq);
                    }
                    /*
                     * Here we are done processing the field and we can
                     * buid the FST and write it to disk.
                     */
                    FST<Pair<Long, BytesRef>> build = builder.build();
                    assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                            + docCount + "]";
                    /*
                     * it's possible that the FST is null if we have 2 segments that get merged
                     * and all docs that have a value in this field are deleted. This will cause
                     * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                     * to return null.
                     */
                    if (build != null) {
                        fieldOffsets.put(field, output.getFilePointer());
                        build.save(output);
                        /* write some more meta-info */
                        output.writeVInt(maxAnalyzedPathsForOneInput);
                        output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                        output.writeInt(maxGraphExpansions); // can be negative
                        int options = 0;
                        options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                        options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                        options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                        output.writeVInt(options);
                        output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                        output.writeVInt(XAnalyzingSuggester.END_BYTE);
                        output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                        output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                    }
                }
            }
        };
    }
    

    From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java

    License:Apache License

    @Override
    public FieldsConsumer consumer(final IndexOutput output) throws IOException {
        // TODO write index header?
        CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
        return new FieldsConsumer() {
            private Map<String, Long> fieldOffsets = new HashMap<>();
    
            @Override//ww  w  .ja va 2s.co m
            public void close() throws IOException {
                try { /*
                       * write the offsets per field such that we know where
                       * we need to load the FSTs from
                       */
                    long pointer = output.getFilePointer();
                    output.writeVInt(fieldOffsets.size());
                    for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                        output.writeString(entry.getKey());
                        output.writeVLong(entry.getValue());
                    }
                    output.writeLong(pointer);
                } finally {
                    IOUtils.close(output);
                }
            }
    
            @Override
            public void write(Fields fields) throws IOException {
                for (String field : fields) {
                    Terms terms = fields.terms(field);
                    if (terms == null) {
                        continue;
                    }
                    TermsEnum termsEnum = terms.iterator();
                    PostingsEnum docsEnum = null;
                    final SuggestPayload spare = new SuggestPayload();
                    int maxAnalyzedPathsForOneInput = 0;
                    final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                            maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                    int docCount = 0;
                    while (true) {
                        BytesRef term = termsEnum.next();
                        if (term == null) {
                            break;
                        }
                        docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
                        builder.startTerm(term);
                        int docFreq = 0;
                        while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                            for (int i = 0; i < docsEnum.freq(); i++) {
                                final int position = docsEnum.nextPosition();
                                AnalyzingCompletionLookupProviderV1.this.parsePayload(docsEnum.getPayload(), spare);
                                builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                                // multi fields have the same surface form so we sum up here
                                maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                            }
                            docFreq++;
                            docCount = Math.max(docCount, docsEnum.docID() + 1);
                        }
                        builder.finishTerm(docFreq);
                    }
                    /*
                     * Here we are done processing the field and we can
                     * buid the FST and write it to disk.
                     */
                    FST<Pair<Long, BytesRef>> build = builder.build();
                    assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                            + docCount + "]";
                    /*
                     * it's possible that the FST is null if we have 2 segments that get merged
                     * and all docs that have a value in this field are deleted. This will cause
                     * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                     * to return null.
                     */
                    if (build != null) {
                        fieldOffsets.put(field, output.getFilePointer());
                        build.save(output);
                        /* write some more meta-info */
                        output.writeVInt(maxAnalyzedPathsForOneInput);
                        output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                        output.writeInt(maxGraphExpansions); // can be negative
                        int options = 0;
                        options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                        options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                        options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                        output.writeVInt(options);
                    }
                }
            }
        };
    }