Example usage for org.apache.lucene.index TermsEnum seekExact

List of usage examples for org.apache.lucene.index TermsEnum seekExact

Introduction

In this page you can find the example usage for org.apache.lucene.index TermsEnum seekExact.

Prototype

public abstract void seekExact(BytesRef term, TermState state) throws IOException;

Source Link

Document

Expert: Seeks a specific position by TermState previously obtained from #termState() .

Usage

From source file:com.xiaomi.linden.lucene.query.flexiblequery.FlexibleWeight.java

License:Apache License

@Override
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
    FlexibleQuery.FlexibleTerm[][] termMatrix = query.getTerms();
    TermDocsEnum[][] matchedEnumsMatrix = new TermDocsEnum[termMatrix.length][];
    for (int i = 0; i < termMatrix.length; ++i) {
        FlexibleQuery.FlexibleTerm[] fieldTerms = termMatrix[i];
        matchedEnumsMatrix[i] = new TermDocsEnum[fieldTerms.length];
        if (fieldTerms.length == 0) {
            return null;
        }//from  w  w w  .  j a v a 2  s.c  o m

        // Reuse single TermsEnum below:
        Terms terms = context.reader().terms(fieldTerms[0].term.field());
        if (terms == null) {
            for (int j = 0; j < fieldTerms.length; ++j) {
                matchedEnumsMatrix[i][j] = new TermDocsEnum(fieldTerms[j], 0, null, null, i, j);
            }
        } else {
            final TermsEnum termsEnum = terms.iterator(null);
            for (int j = 0; j < fieldTerms.length; ++j) {
                TermStats termStats = termStatsMatrix[i][j];
                Term term = termStats.term;
                final TermState state = termStats.termContext.get(context.ord);
                Similarity.SimScorer docScorer = similarity.simScorer(termStats.stats, context);
                DocsAndPositionsEnum postings = null;
                int docFreq = 0;
                if (state != null) {
                    termsEnum.seekExact(term.bytes(), state);
                    postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_OFFSETS);
                    docFreq = termsEnum.docFreq();
                }
                matchedEnumsMatrix[i][j] = new TermDocsEnum(fieldTerms[j], docFreq, postings, docScorer, i, j);
            }
        }
    }
    FlexibleScoreModelStrategy strategy;
    try {
        strategy = (FlexibleScoreModelStrategy) LindenScoreModelStrategyBuilder
                .buildFlexibleQueryStrategy(query);
    } catch (Exception e) {
        throw new IOException(e);
    }
    strategy.preProcess(context, query.getConfig().getSchema(), query.getModel());
    strategy.setSimilarity(similarity);
    strategy.init();
    return new FlexibleScorer(this, strategy, matchedEnumsMatrix);
}

From source file:nl.inl.blacklab.search.lucene.BLSpanTermQuery.java

License:Apache License

/**
 * Overridden frmo SpanTermQuery to return a BLSpans instead.
 *//*from   www  .j  a v  a 2  s  . c  o m*/
@Override
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts)
        throws IOException {
    TermContext termContext = termContexts.get(term);
    final TermState state;
    if (termContext == null) {
        // this happens with span-not query, as it doesn't include the NOT
        // side in extractTerms()
        // so we seek to the term now in this segment..., this sucks because
        // its ugly mostly!
        final Fields fields = context.reader().fields();
        if (fields != null) {
            final Terms terms = fields.terms(term.field());
            if (terms != null) {
                final TermsEnum termsEnum = terms.iterator(null);
                if (termsEnum.seekExact(term.bytes(), true)) {
                    state = termsEnum.termState();
                } else {
                    state = null;
                }
            } else {
                state = null;
            }
        } else {
            state = null;
        }
    } else {
        state = termContext.get(context.ord);
    }

    if (state == null) { // term is not present in that reader
        return TermSpans.EMPTY_TERM_SPANS;
    }

    final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
    termsEnum.seekExact(term.bytes(), state);

    final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null,
            DocsAndPositionsEnum.FLAG_PAYLOADS);

    if (postings != null) {
        return new TermSpans(postings, term);
    }
    // term does exist, but has no positions
    throw new IllegalStateException("field \"" + term.field()
            + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
}

From source file:org.apache.blur.lucene.search.PrimeDocCache.java

License:Apache License

/**
 * The way this method is called via warm up methods the likelihood of
 * creating multiple bitsets during a race condition is very low, that's why
 * this method is not synced.//from  ww w.  ja  v  a  2  s.  c  o m
 */
public static OpenBitSet getPrimeDocBitSet(Term primeDocTerm, AtomicReader providedReader) throws IOException {
    AtomicReader reader = AtomicReaderUtil.getSegmentReader(providedReader);
    final Object key = reader.getCoreCacheKey();
    final Map<Object, OpenBitSet> primeDocMap = getPrimeDocMap(primeDocTerm);
    OpenBitSet bitSet = primeDocMap.get(key);
    if (bitSet == null) {
        synchronized (reader) {
            reader.addReaderClosedListener(new ReaderClosedListener() {
                @Override
                public void onClose(IndexReader reader) {
                    LOG.debug("Current size [" + primeDocMap.size()
                            + "] Prime Doc BitSet removing for segment [" + reader + "]");
                    OpenBitSet openBitSet = primeDocMap.remove(key);
                    if (openBitSet == null) {
                        LOG.warn("Primedoc was missing for key [{0}]", key);
                    }
                }
            });
            LOG.debug("Prime Doc BitSet missing for segment [" + reader + "] current size ["
                    + primeDocMap.size() + "]");
            final OpenBitSet bs = new OpenBitSet(reader.maxDoc());
            MemoryLeakDetector.record(bs, "PrimeDoc BitSet", key.toString());

            Fields fields = reader.fields();
            if (fields == null) {
                throw new IOException("Missing all fields.");
            }
            Terms terms = fields.terms(primeDocTerm.field());
            if (terms == null) {
                throw new IOException("Missing prime doc field [" + primeDocTerm.field() + "].");
            }
            TermsEnum termsEnum = terms.iterator(null);
            if (!termsEnum.seekExact(primeDocTerm.bytes(), true)) {
                throw new IOException("Missing prime doc term [" + primeDocTerm + "].");
            }

            DocsEnum docsEnum = termsEnum.docs(null, null);
            int docFreq = reader.docFreq(primeDocTerm);
            int doc;
            int count = 0;
            while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                bs.fastSet(doc);
                count++;
            }
            if (count == docFreq) {
                primeDocMap.put(key, bs);
            } else {
                LOG.warn(
                        "PrimeDoc for reader [{0}] not stored, because count [{1}] and freq [{2}] do not match.",
                        reader, count, docFreq);
            }
            return bs;
        }
    }
    return bitSet;
}

From source file:org.apache.blur.utils.BlurUtil.java

License:Apache License

private static void applyFamily(OpenBitSet bits, String family, AtomicReader atomicReader, int primeDocRowId,
        int numberOfDocsInRow, Bits liveDocs) throws IOException {
    Fields fields = atomicReader.fields();
    Terms terms = fields.terms(BlurConstants.FAMILY);
    TermsEnum iterator = terms.iterator(null);
    BytesRef text = new BytesRef(family);
    int lastDocId = primeDocRowId + numberOfDocsInRow;
    if (iterator.seekExact(text, true)) {
        DocsEnum docs = iterator.docs(liveDocs, null, DocsEnum.FLAG_NONE);
        int doc = primeDocRowId;
        while ((doc = docs.advance(doc)) < lastDocId) {
            bits.set(doc - primeDocRowId);
        }// w  ww.  ja  va2  s  .  c o  m
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.all.AllTermQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    if (needsScores == false) {
        return new TermQuery(term).createWeight(searcher, needsScores);
    }/*from   w w w. ja  va 2 s .co  m*/
    final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
    final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
    final TermStatistics termStats = searcher.termStatistics(term, termStates);
    final Similarity similarity = searcher.getSimilarity(needsScores);
    final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
    return new Weight(this) {

        @Override
        public float getValueForNormalization() throws IOException {
            return stats.getValueForNormalization();
        }

        @Override
        public void normalize(float norm, float topLevelBoost) {
            stats.normalize(norm, topLevelBoost);
        }

        @Override
        public void extractTerms(Set<Term> terms) {
            terms.add(term);
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            AllTermScorer scorer = scorer(context);
            if (scorer != null) {
                int newDoc = scorer.iterator().advance(doc);
                if (newDoc == doc) {
                    float score = scorer.score();
                    float freq = scorer.freq();
                    SimScorer docScorer = similarity.simScorer(stats, context);
                    Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
                    Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
                    Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(),
                            "payloadBoost=" + scorer.payloadBoost());
                    return Explanation.match(score,
                            "weight(" + getQuery() + " in " + doc + ") ["
                                    + similarity.getClass().getSimpleName() + "], product of:",
                            termScoreExplanation, payloadBoostExplanation);
                }
            }
            return Explanation.noMatch("no matching term");
        }

        @Override
        public AllTermScorer scorer(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(term.field());
            if (terms == null) {
                return null;
            }
            final TermsEnum termsEnum = terms.iterator();
            if (termsEnum == null) {
                return null;
            }
            final TermState state = termStates.get(context.ord);
            if (state == null) {
                // Term does not exist in this segment
                return null;
            }
            termsEnum.seekExact(term.bytes(), state);
            PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
            assert docs != null;
            return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
        }

    };
}

From source file:org.elasticsearch.common.lucene.search.XTermsFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
    final AtomicReader reader = context.reader();
    FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time
    final Fields fields = reader.fields();
    final BytesRef spare = new BytesRef(this.termsBytes);
    if (fields == null) {
        return result;
    }/*ww  w.ja  v  a  2 s .c o  m*/
    Terms terms = null;
    TermsEnum termsEnum = null;
    DocsEnum docs = null;
    for (TermsAndField termsAndField : this.termsAndFields) {
        if ((terms = fields.terms(termsAndField.field)) != null) {
            termsEnum = terms.iterator(termsEnum); // this won't return null
            for (int i = termsAndField.start; i < termsAndField.end; i++) {
                spare.offset = offsets[i];
                spare.length = offsets[i + 1] - offsets[i];
                if (termsEnum.seekExact(spare, false)) { // don't use cache since we could pollute the cache here easily
                    docs = termsEnum.docs(acceptDocs, docs, 0); // no freq since we don't need them
                    if (result == null) {
                        if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                            result = new FixedBitSet(reader.maxDoc());
                            // lazy init but don't do it in the hot loop since we could read many docs
                            result.set(docs.docID());
                        }
                    }
                    while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        result.set(docs.docID());
                    }
                }
            }
        }
    }
    return result;
}

From source file:org.kie.kieora.backend.lucene.setups.BaseLuceneSetup.java

License:Apache License

protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException {
    final List<AtomicReaderContext> subReaders = searcher.getIndexReader().leaves();
    final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()];
    final DocsEnum[] docsEnums = new DocsEnum[subReaders.size()];
    for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
        termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(null);
    }/*from   w  ww  . j a  v a  2s .c o  m*/

    int[] results = new int[ids.length];

    for (int i = 0; i < results.length; i++) {
        results[i] = -1;
    }

    for (int idx = 0; idx < ids.length; idx++) {
        int base = 0;
        final BytesRef id = new BytesRef(ids[idx]);
        for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
            final AtomicReader sub = subReaders.get(subIDX).reader();
            final TermsEnum termsEnum = termsEnums[subIDX];
            if (termsEnum.seekExact(id, false)) {
                final DocsEnum docs = docsEnums[subIDX] = termsEnum.docs(sub.getLiveDocs(), docsEnums[subIDX],
                        0);
                if (docs != null) {
                    final int docID = docs.nextDoc();
                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        results[idx] = base + docID;
                        break;
                    }
                }
            }
            base += sub.maxDoc();
        }
    }

    return results;
}