Example usage for org.apache.lucene.index PostingsEnum PAYLOADS

Introduction

In this page you can find the example usage for org.apache.lucene.index PostingsEnum PAYLOADS.

Prototype

short PAYLOADS

To view the source code for org.apache.lucene.index PostingsEnum PAYLOADS.

Click Source Link

Document

Flag to pass to TermsEnum#postings(PostingsEnum,int) if you require payloads in the returned enum.

Usage

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

/**
 * checks the terms enum sequentially/* w ww. j ava  2  s . c o  m*/
 * if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
 */
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep,
        boolean hasPositions) throws Exception {
    BytesRef term;
    PostingsEnum leftPositions = null;
    PostingsEnum rightPositions = null;
    PostingsEnum leftDocs = null;
    PostingsEnum rightDocs = null;

    while ((term = leftTermsEnum.next()) != null) {
        assertEquals(term, rightTermsEnum.next());
        assertTermStats(leftTermsEnum, rightTermsEnum);
        if (deep) {
            if (hasPositions) {
                // with payloads + off
                assertDocsAndPositionsEnum(
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL));

                assertPositionsSkipping(leftTermsEnum.docFreq(),
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL));
                // with payloads only
                assertDocsAndPositionsEnum(
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS));

                assertPositionsSkipping(leftTermsEnum.docFreq(),
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS));

                // with offsets only
                assertDocsAndPositionsEnum(
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS));

                assertPositionsSkipping(leftTermsEnum.docFreq(),
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS));

                // with positions only
                assertDocsAndPositionsEnum(
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS));

                assertPositionsSkipping(leftTermsEnum.docFreq(),
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS));
            }

            // with freqs:
            assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs),
                    rightDocs = rightTermsEnum.postings(rightDocs));

            // w/o freqs:
            assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE),
                    rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE));

            // with freqs:
            assertDocsSkipping(leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.postings(leftDocs),
                    rightDocs = rightTermsEnum.postings(rightDocs));

            // w/o freqs:
            assertDocsSkipping(leftTermsEnum.docFreq(),
                    leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE),
                    rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE));
        }
    }
    assertNull(rightTermsEnum.next());
}

From source file:com.shaie.annots.AnnotationsUtils.java

License:Apache License

public static void printAnnotations(LeafReader reader, Term term) throws IOException {
    System.out.println("Annotations for " + term);
    final ByteArrayDataInput in = new ByteArrayDataInput();
    final PostingsEnum postings = reader.postings(term, PostingsEnum.PAYLOADS);
    for (int docID = postings.nextDoc(); docID != DocIdSetIterator.NO_MORE_DOCS; docID = postings.nextDoc()) {
        final int freq = postings.freq();
        System.out.println("  doc=" + docID + ", freq=" + freq);
        for (int i = 0; i < freq; i++) {
            postings.nextPosition();/*from www  . java2s.  c o  m*/
            final BytesRef payload = postings.getPayload();
            in.reset(payload.bytes, payload.offset, payload.length);
            System.out.println("    start=" + in.readVInt() + ", length=" + in.readVInt());
        }
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.all.AllTermQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    if (needsScores == false) {
        return new TermQuery(term).createWeight(searcher, needsScores);
    }/*from   www.  j a  v  a 2 s . c  o m*/
    final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
    final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
    final TermStatistics termStats = searcher.termStatistics(term, termStates);
    final Similarity similarity = searcher.getSimilarity(needsScores);
    final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
    return new Weight(this) {

        @Override
        public float getValueForNormalization() throws IOException {
            return stats.getValueForNormalization();
        }

        @Override
        public void normalize(float norm, float topLevelBoost) {
            stats.normalize(norm, topLevelBoost);
        }

        @Override
        public void extractTerms(Set<Term> terms) {
            terms.add(term);
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            AllTermScorer scorer = scorer(context);
            if (scorer != null) {
                int newDoc = scorer.iterator().advance(doc);
                if (newDoc == doc) {
                    float score = scorer.score();
                    float freq = scorer.freq();
                    SimScorer docScorer = similarity.simScorer(stats, context);
                    Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
                    Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
                    Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(),
                            "payloadBoost=" + scorer.payloadBoost());
                    return Explanation.match(score,
                            "weight(" + getQuery() + " in " + doc + ") ["
                                    + similarity.getClass().getSimpleName() + "], product of:",
                            termScoreExplanation, payloadBoostExplanation);
                }
            }
            return Explanation.noMatch("no matching term");
        }

        @Override
        public AllTermScorer scorer(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(term.field());
            if (terms == null) {
                return null;
            }
            final TermsEnum termsEnum = terms.iterator();
            if (termsEnum == null) {
                return null;
            }
            final TermState state = termStates.get(context.ord);
            if (state == null) {
                // Term does not exist in this segment
                return null;
            }
            termsEnum.seekExact(term.bytes(), state);
            PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
            assert docs != null;
            return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
        }

    };
}

From source file:org.elasticsearch.common.lucene.uid.PerThreadIDAndVersionLookup.java

License:Apache License

/** Return null if id is not found. */
public DocIdAndVersion lookup(BytesRef id, Bits liveDocs, LeafReaderContext context) throws IOException {
    if (termsEnum.seekExact(id)) {
        if (versions != null || hasPayloads == false) {
            // Use NDV to retrieve the version, in which case we only need PostingsEnum:

            // there may be more than one matching docID, in the case of nested docs, so we want the last one:
            docsEnum = termsEnum.postings(docsEnum, 0);
            int docID = DocIdSetIterator.NO_MORE_DOCS;
            for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
                if (liveDocs != null && liveDocs.get(d) == false) {
                    continue;
                }/*from  w  ww.ja va 2s. c om*/
                docID = d;
            }

            if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                if (versions != null) {
                    return new DocIdAndVersion(docID, versions.get(docID), context);
                } else {
                    // _uid found, but no doc values and no payloads
                    return new DocIdAndVersion(docID, Versions.NOT_SET, context);
                }
            }
        }

        // ... but used to be stored as payloads; in this case we must use PostingsEnum
        posEnum = termsEnum.postings(posEnum, PostingsEnum.PAYLOADS);
        assert posEnum != null; // terms has payloads
        for (int d = posEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = posEnum.nextDoc()) {
            if (liveDocs != null && liveDocs.get(d) == false) {
                continue;
            }
            posEnum.nextPosition();
            final BytesRef payload = posEnum.getPayload();
            if (payload != null && payload.length == 8) {
                // TODO: does this break the nested docs case?  we are not returning the last matching docID here?
                return new DocIdAndVersion(d, Numbers.bytesToLong(payload), context);
            }
        }
    }

    return null;
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override/* w w w  .  j  a va  2 s .  co  m*/
        public void close() throws IOException {
            try {
                /*
                 * write the offsets per field such that we know where
                 * we need to load the FSTs from
                 */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                CodecUtil.writeFooter(output);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                    output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                    output.writeVInt(XAnalyzingSuggester.END_BYTE);
                    output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                    output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                }
            }
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    // TODO write index header?
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override/*from w ww  . jav a 2s . c o  m*/
        public void close() throws IOException {
            try { /*
                   * write the offsets per field such that we know where
                   * we need to load the FSTs from
                   */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProviderV1.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                }
            }
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion2x.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override// w  ww .ja va  2  s. com
        public void close() throws IOException {
            try {
                /*
                 * write the offsets per field such that we know where
                 * we need to load the FSTs from
                 */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                CodecUtil.writeFooter(output);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                    output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                    output.writeVInt(XAnalyzingSuggester.END_BYTE);
                    output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                    output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                }
            }
        }
    };
}