List of usage examples for org.apache.lucene.index TermsEnum seekExact
public abstract void seekExact(BytesRef term, TermState state) throws IOException;
From source file:com.xiaomi.linden.lucene.query.flexiblequery.FlexibleWeight.java
License:Apache License
@Override public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { FlexibleQuery.FlexibleTerm[][] termMatrix = query.getTerms(); TermDocsEnum[][] matchedEnumsMatrix = new TermDocsEnum[termMatrix.length][]; for (int i = 0; i < termMatrix.length; ++i) { FlexibleQuery.FlexibleTerm[] fieldTerms = termMatrix[i]; matchedEnumsMatrix[i] = new TermDocsEnum[fieldTerms.length]; if (fieldTerms.length == 0) { return null; }//from w w w . j a v a 2 s.c o m // Reuse single TermsEnum below: Terms terms = context.reader().terms(fieldTerms[0].term.field()); if (terms == null) { for (int j = 0; j < fieldTerms.length; ++j) { matchedEnumsMatrix[i][j] = new TermDocsEnum(fieldTerms[j], 0, null, null, i, j); } } else { final TermsEnum termsEnum = terms.iterator(null); for (int j = 0; j < fieldTerms.length; ++j) { TermStats termStats = termStatsMatrix[i][j]; Term term = termStats.term; final TermState state = termStats.termContext.get(context.ord); Similarity.SimScorer docScorer = similarity.simScorer(termStats.stats, context); DocsAndPositionsEnum postings = null; int docFreq = 0; if (state != null) { termsEnum.seekExact(term.bytes(), state); postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_OFFSETS); docFreq = termsEnum.docFreq(); } matchedEnumsMatrix[i][j] = new TermDocsEnum(fieldTerms[j], docFreq, postings, docScorer, i, j); } } } FlexibleScoreModelStrategy strategy; try { strategy = (FlexibleScoreModelStrategy) LindenScoreModelStrategyBuilder .buildFlexibleQueryStrategy(query); } catch (Exception e) { throw new IOException(e); } strategy.preProcess(context, query.getConfig().getSchema(), query.getModel()); strategy.setSimilarity(similarity); strategy.init(); return new FlexibleScorer(this, strategy, matchedEnumsMatrix); }
From source file:nl.inl.blacklab.search.lucene.BLSpanTermQuery.java
License:Apache License
/** * Overridden frmo SpanTermQuery to return a BLSpans instead. *//*from www .j a v a 2 s . c o m*/ @Override public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { TermContext termContext = termContexts.get(term); final TermState state; if (termContext == null) { // this happens with span-not query, as it doesn't include the NOT // side in extractTerms() // so we seek to the term now in this segment..., this sucks because // its ugly mostly! final Fields fields = context.reader().fields(); if (fields != null) { final Terms terms = fields.terms(term.field()); if (terms != null) { final TermsEnum termsEnum = terms.iterator(null); if (termsEnum.seekExact(term.bytes(), true)) { state = termsEnum.termState(); } else { state = null; } } else { state = null; } } else { state = null; } } else { state = termContext.get(context.ord); } if (state == null) { // term is not present in that reader return TermSpans.EMPTY_TERM_SPANS; } final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null); termsEnum.seekExact(term.bytes(), state); final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS); if (postings != null) { return new TermSpans(postings, term); } // term does exist, but has no positions throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")"); }
From source file:org.apache.blur.lucene.search.PrimeDocCache.java
License:Apache License
/** * The way this method is called via warm up methods the likelihood of * creating multiple bitsets during a race condition is very low, that's why * this method is not synced.//from ww w. ja v a 2 s. c o m */ public static OpenBitSet getPrimeDocBitSet(Term primeDocTerm, AtomicReader providedReader) throws IOException { AtomicReader reader = AtomicReaderUtil.getSegmentReader(providedReader); final Object key = reader.getCoreCacheKey(); final Map<Object, OpenBitSet> primeDocMap = getPrimeDocMap(primeDocTerm); OpenBitSet bitSet = primeDocMap.get(key); if (bitSet == null) { synchronized (reader) { reader.addReaderClosedListener(new ReaderClosedListener() { @Override public void onClose(IndexReader reader) { LOG.debug("Current size [" + primeDocMap.size() + "] Prime Doc BitSet removing for segment [" + reader + "]"); OpenBitSet openBitSet = primeDocMap.remove(key); if (openBitSet == null) { LOG.warn("Primedoc was missing for key [{0}]", key); } } }); LOG.debug("Prime Doc BitSet missing for segment [" + reader + "] current size [" + primeDocMap.size() + "]"); final OpenBitSet bs = new OpenBitSet(reader.maxDoc()); MemoryLeakDetector.record(bs, "PrimeDoc BitSet", key.toString()); Fields fields = reader.fields(); if (fields == null) { throw new IOException("Missing all fields."); } Terms terms = fields.terms(primeDocTerm.field()); if (terms == null) { throw new IOException("Missing prime doc field [" + primeDocTerm.field() + "]."); } TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(primeDocTerm.bytes(), true)) { throw new IOException("Missing prime doc term [" + primeDocTerm + "]."); } DocsEnum docsEnum = termsEnum.docs(null, null); int docFreq = reader.docFreq(primeDocTerm); int doc; int count = 0; while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { bs.fastSet(doc); count++; } if (count == docFreq) { primeDocMap.put(key, bs); } else { LOG.warn( "PrimeDoc for reader [{0}] not stored, because count [{1}] and freq [{2}] do not match.", reader, count, docFreq); } return bs; } } return bitSet; }
From source file:org.apache.blur.utils.BlurUtil.java
License:Apache License
private static void applyFamily(OpenBitSet bits, String family, AtomicReader atomicReader, int primeDocRowId, int numberOfDocsInRow, Bits liveDocs) throws IOException { Fields fields = atomicReader.fields(); Terms terms = fields.terms(BlurConstants.FAMILY); TermsEnum iterator = terms.iterator(null); BytesRef text = new BytesRef(family); int lastDocId = primeDocRowId + numberOfDocsInRow; if (iterator.seekExact(text, true)) { DocsEnum docs = iterator.docs(liveDocs, null, DocsEnum.FLAG_NONE); int doc = primeDocRowId; while ((doc = docs.advance(doc)) < lastDocId) { bits.set(doc - primeDocRowId); }// w ww. ja va2 s . c o m } }
From source file:org.codelibs.elasticsearch.common.lucene.all.AllTermQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { if (needsScores == false) { return new TermQuery(term).createWeight(searcher, needsScores); }/*from w w w. ja va 2 s .co m*/ final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term); final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field()); final TermStatistics termStats = searcher.termStatistics(term, termStates); final Similarity similarity = searcher.getSimilarity(needsScores); final SimWeight stats = similarity.computeWeight(collectionStats, termStats); return new Weight(this) { @Override public float getValueForNormalization() throws IOException { return stats.getValueForNormalization(); } @Override public void normalize(float norm, float topLevelBoost) { stats.normalize(norm, topLevelBoost); } @Override public void extractTerms(Set<Term> terms) { terms.add(term); } @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { AllTermScorer scorer = scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { float score = scorer.score(); float freq = scorer.freq(); SimScorer docScorer = similarity.simScorer(stats, context); Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq); Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation); Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost()); return Explanation.match(score, "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], product of:", termScoreExplanation, payloadBoostExplanation); } } return Explanation.noMatch("no matching term"); } @Override public AllTermScorer scorer(LeafReaderContext context) throws IOException { final Terms terms = context.reader().terms(term.field()); if (terms == null) { return null; } final TermsEnum termsEnum = terms.iterator(); if (termsEnum == null) { return null; } final TermState state = termStates.get(context.ord); if (state == null) { // Term does not exist in this segment return null; } termsEnum.seekExact(term.bytes(), state); PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS); assert docs != null; return new AllTermScorer(this, docs, similarity.simScorer(stats, context)); } }; }
From source file:org.elasticsearch.common.lucene.search.XTermsFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final AtomicReader reader = context.reader(); FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time final Fields fields = reader.fields(); final BytesRef spare = new BytesRef(this.termsBytes); if (fields == null) { return result; }/*ww w.ja v a 2 s .c o m*/ Terms terms = null; TermsEnum termsEnum = null; DocsEnum docs = null; for (TermsAndField termsAndField : this.termsAndFields) { if ((terms = fields.terms(termsAndField.field)) != null) { termsEnum = terms.iterator(termsEnum); // this won't return null for (int i = termsAndField.start; i < termsAndField.end; i++) { spare.offset = offsets[i]; spare.length = offsets[i + 1] - offsets[i]; if (termsEnum.seekExact(spare, false)) { // don't use cache since we could pollute the cache here easily docs = termsEnum.docs(acceptDocs, docs, 0); // no freq since we don't need them if (result == null) { if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { result = new FixedBitSet(reader.maxDoc()); // lazy init but don't do it in the hot loop since we could read many docs result.set(docs.docID()); } } while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { result.set(docs.docID()); } } } } } return result; }
From source file:org.kie.kieora.backend.lucene.setups.BaseLuceneSetup.java
License:Apache License
protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException { final List<AtomicReaderContext> subReaders = searcher.getIndexReader().leaves(); final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()]; final DocsEnum[] docsEnums = new DocsEnum[subReaders.size()]; for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(null); }/*from w ww . j a v a 2s .c o m*/ int[] results = new int[ids.length]; for (int i = 0; i < results.length; i++) { results[i] = -1; } for (int idx = 0; idx < ids.length; idx++) { int base = 0; final BytesRef id = new BytesRef(ids[idx]); for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { final AtomicReader sub = subReaders.get(subIDX).reader(); final TermsEnum termsEnum = termsEnums[subIDX]; if (termsEnum.seekExact(id, false)) { final DocsEnum docs = docsEnums[subIDX] = termsEnum.docs(sub.getLiveDocs(), docsEnums[subIDX], 0); if (docs != null) { final int docID = docs.nextDoc(); if (docID != DocIdSetIterator.NO_MORE_DOCS) { results[idx] = base + docID; break; } } } base += sub.maxDoc(); } } return results; }