List of usage examples for org.apache.lucene.search DocIdSet bits
public Bits bits() throws IOException
From source file:de.unihildesheim.iw.lucene.util.DocIdSetUtils.java
License:Open Source License
/** * Get the highest document id stored in the {@link DocIdSet}. * * @param dis DocIdSet//from www.j a v a 2 s.c o m * @return Highest document number or {@code -1}, if there's no document * @throws IOException Thrown on low-level i/o-errors */ public static int maxDoc(@NotNull final DocIdSet dis) throws IOException { final int maxDoc; @Nullable final DocIdSetIterator disi = dis.iterator(); if (disi == null) { maxDoc = 0; } else { @Nullable BitSet bitSet; bitSet = BitSetIterator.getFixedBitSetOrNull(disi); if (bitSet == null) { bitSet = BitSetIterator.getSparseFixedBitSetOrNull(disi); } if (bitSet == null) { bitSet = BitsUtils.bits2BitSet(dis.bits()); } if (bitSet == null) { maxDoc = StreamUtils.stream(dis).sorted().max().getAsInt(); } else { if (bitSet.length() == 0) { maxDoc = -1; } else if (bitSet.length() == 1) { maxDoc = bitSet.get(0) ? 0 : -1; } else { maxDoc = bitSet.prevSetBit(bitSet.length() - 1); } } } return maxDoc; }
From source file:de.unihildesheim.iw.lucene.util.DocIdSetUtils.java
License:Open Source License
/** * Get a bits instance from a DocIdSet./*from w w w.j a v a2 s . c o m*/ * * @param dis Set whose bits to get * @return Bits or null, if no bits are set * @throws IOException Thrown on low-level I/O-errors */ @Nullable public static BitSet bits(@NotNull final DocIdSet dis) throws IOException { @Nullable final DocIdSetIterator disi = dis.iterator(); if (disi == null) { return null; } else { @Nullable BitSet bitSet; bitSet = BitSetIterator.getFixedBitSetOrNull(disi); if (bitSet == null) { bitSet = BitSetIterator.getSparseFixedBitSetOrNull(disi); } if (bitSet == null) { bitSet = BitsUtils.bits2BitSet(dis.bits()); } if (bitSet == null) { bitSet = new SparseFixedBitSet(maxDoc(dis) + 1); StreamUtils.stream(disi).forEach(bitSet::set); } return bitSet; } }
From source file:lucene.security.search.DocumentVisibilityFilter.java
License:Apache License
public static DocIdSet getLogicalOr(final List<DocIdSet> list) throws IOException { if (list.size() == 0) { return DocIdSet.EMPTY_DOCIDSET; }/*from w w w . ja va2 s .c om*/ if (list.size() == 1) { DocIdSet docIdSet = list.get(0); Bits bits = docIdSet.bits(); if (bits == null) { throw new IOException("Bits are not allowed to be null for DocIdSet [" + docIdSet + "]."); } return docIdSet; } int index = 0; final Bits[] bitsArray = new Bits[list.size()]; int length = -1; for (DocIdSet docIdSet : list) { Bits bits = docIdSet.bits(); if (bits == null) { throw new IOException("Bits are not allowed to be null for DocIdSet [" + docIdSet + "]."); } bitsArray[index] = bits; index++; if (length < 0) { length = bits.length(); } else if (length != bits.length()) { throw new IOException( "Bits length need to be the same [" + length + "] and [" + bits.length() + "]"); } } final int len = length; return new DocIdSet() { @Override public Bits bits() throws IOException { return new Bits() { @Override public boolean get(int index) { for (int i = 0; i < bitsArray.length; i++) { if (bitsArray[i].get(index)) { return true; } } return false; } @Override public int length() { return len; } }; } @Override public boolean isCacheable() { return true; } @Override public DocIdSetIterator iterator() throws IOException { final DocIdSetIterator[] docIdSetIteratorArray = new DocIdSetIterator[list.size()]; long c = 0; int index = 0; for (DocIdSet docIdSet : list) { DocIdSetIterator iterator = docIdSet.iterator(); iterator.nextDoc(); docIdSetIteratorArray[index] = iterator; c += iterator.cost(); index++; } final long cost = c; return new DocIdSetIterator() { private int _docId = -1; @Override public int advance(int target) throws IOException { callAdvanceOnAllThatAreBehind(target); Arrays.sort(docIdSetIteratorArray, COMPARATOR); DocIdSetIterator iterator = docIdSetIteratorArray[0]; return _docId = iterator.docID(); } private void callAdvanceOnAllThatAreBehind(int target) throws IOException { for (int i = 0; i < docIdSetIteratorArray.length; i++) { DocIdSetIterator iterator = docIdSetIteratorArray[i]; if (iterator.docID() < target) { iterator.advance(target); } } } @Override public int nextDoc() throws IOException { return advance(_docId + 1); } @Override public int docID() { return _docId; } @Override public long cost() { return cost; } }; } }; }
From source file:org.apache.solr.request.IntervalFacets.java
License:Apache License
private void getCountString() throws IOException { Filter filter = docs.getTopFilter(); List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves(); for (int subIndex = 0; subIndex < leaves.size(); subIndex++) { AtomicReaderContext leaf = leaves.get(subIndex); DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs if (dis == null) { continue; }/*from w w w . j a va2 s .c o m*/ DocIdSetIterator disi = dis.iterator(); if (disi != null) { if (schemaField.multiValued()) { SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(schemaField.getName()); if (sub == null) { continue; } final SortedDocValues singleton = DocValues.unwrapSingleton(sub); if (singleton != null) { // some codecs may optimize SORTED_SET storage for single-valued fields accumIntervalsSingle(singleton, disi, dis.bits()); } else { accumIntervalsMulti(sub, disi, dis.bits()); } } else { SortedDocValues sub = leaf.reader().getSortedDocValues(schemaField.getName()); if (sub == null) { continue; } accumIntervalsSingle(sub, disi, dis.bits()); } } } }
From source file:org.apache.solr.search.Filter.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { return new Weight(this) { @Override/* ww w . j ava2s .c o m*/ public void extractTerms(Set<Term> terms) { } @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { final Scorer scorer = scorer(context); final boolean match = (scorer != null && scorer.iterator().advance(doc) == doc); if (match) { assert scorer.score() == 0f; return Explanation.match(0f, "Match on id " + doc); } else { return Explanation.match(0f, "No match on id " + doc); } } @Override public Scorer scorer(LeafReaderContext context) throws IOException { final DocIdSet set = getDocIdSet(context, null); if (set == null) { return null; } if (applyLazily && set.bits() != null) { final Bits bits = set.bits(); final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc()); final TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { return bits.get(approximation.docID()); } @Override public float matchCost() { return 10; // TODO use cost of bits.get() } }; return new ConstantScoreScorer(this, 0f, twoPhase); } final DocIdSetIterator iterator = set.iterator(); if (iterator == null) { return null; } return new ConstantScoreScorer(this, 0f, iterator); } }; }
From source file:org.elasticsearch.common.lucene.docset.AndDocIdSet.java
License:Apache License
@Override public DocIdSetIterator iterator() throws IOException { // we try and be smart here, if we can iterate through docsets quickly, prefer to iterate // over them as much as possible, before actually going to "bits" based ones to check List<DocIdSet> iterators = new ArrayList<DocIdSet>(sets.length); List<Bits> bits = new ArrayList<Bits>(sets.length); for (DocIdSet set : sets) { if (DocIdSets.isFastIterator(set)) { iterators.add(set);/*from w ww.ja v a 2 s.c om*/ } else { Bits bit = set.bits(); if (bit != null) { bits.add(bit); } else { iterators.add(set); } } } if (bits.isEmpty()) { return IteratorBasedIterator.newDocIdSetIterator(iterators.toArray(new DocIdSet[iterators.size()])); } if (iterators.isEmpty()) { return new BitsDocIdSetIterator(new AndBits(bits.toArray(new Bits[bits.size()]))); } // combination of both..., first iterating over the "fast" ones, and then checking on the more // expensive ones return new BitsDocIdSetIterator.FilteredIterator( IteratorBasedIterator.newDocIdSetIterator(iterators.toArray(new DocIdSet[iterators.size()])), new AndBits(bits.toArray(new Bits[bits.size()]))); }
From source file:org.elasticsearch.common.lucene.docset.DocIdSets.java
License:Apache License
/** * Gets a set to bits.//from w ww. ja v a 2 s .co m */ public static Bits toSafeBits(AtomicReader reader, @Nullable DocIdSet set) throws IOException { if (set == null) { return new Bits.MatchNoBits(reader.maxDoc()); } Bits bits = set.bits(); if (bits != null) { return bits; } DocIdSetIterator iterator = set.iterator(); if (iterator == null) { return new Bits.MatchNoBits(reader.maxDoc()); } return toFixedBitSet(iterator, reader.maxDoc()); }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java
License:Apache License
/** * Returns the a DocIdSetIterator representing the Boolean composition * of the filters that have been added./*from ww w . j a va2 s . c o m*/ */ @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { FixedBitSet res = null; final AtomicReader reader = context.reader(); // optimize single case... if (clauses.size() == 1) { FilterClause clause = clauses.get(0); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST_NOT) { if (DocIdSets.isEmpty(set)) { return new AllDocIdSet(reader.maxDoc()); } else { return new NotDocIdSet(set, reader.maxDoc()); } } // SHOULD or MUST, just return the set... if (DocIdSets.isEmpty(set)) { return null; } return set; } // first, go over and see if we can shortcut the execution // and gather Bits if we need to List<ResultClause> results = new ArrayList<ResultClause>(clauses.size()); boolean hasShouldClauses = false; boolean hasNonEmptyShouldClause = false; boolean hasMustClauses = false; boolean hasMustNotClauses = false; for (int i = 0; i < clauses.size(); i++) { FilterClause clause = clauses.get(i); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST) { hasMustClauses = true; if (DocIdSets.isEmpty(set)) { return null; } } else if (clause.getOccur() == Occur.SHOULD) { hasShouldClauses = true; if (DocIdSets.isEmpty(set)) { continue; } hasNonEmptyShouldClause = true; } else if (clause.getOccur() == Occur.MUST_NOT) { hasMustNotClauses = true; if (DocIdSets.isEmpty(set)) { // we mark empty ones as null for must_not, handle it in the next run... results.add(new ResultClause(null, null, clause)); continue; } } Bits bits = null; if (!DocIdSets.isFastIterator(set)) { bits = set.bits(); } results.add(new ResultClause(set, bits, clause)); } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } // now, go over the clauses and apply the "fast" ones first... hasNonEmptyShouldClause = false; boolean hasBits = false; // But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs // that don't match with a must or must_not clause. List<ResultClause> fastOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { fastOrClauses.add(clause); } else if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } } else { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res.or(it); } } } } // Now we safely handle the "fast" must and must_not clauses. for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.MUST) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { res.and(it); } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs } if (clause.docIdSet != null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } } } if (!hasBits) { if (!fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } } // we have some clauses with bits, apply them... // we let the "res" drive the computation, and check Bits for that List<ResultClause> slowOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); if (clause.bits == null) { continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { slowOrClauses.add(clause); } else { if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { continue; } hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { for (int doc = 0; doc < reader.maxDoc(); doc++) { if (!res.get(doc) && clause.bits.get(doc)) { hasNonEmptyShouldClause = true; res.set(doc); } } } } } else if (clause.clause.getOccur() == Occur.MUST) { if (res == null) { // nothing we can do, just or it... res = new FixedBitSet(reader.maxDoc()); DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } res.or(it); } else { Bits bits = clause.bits; // use the "res" to drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (!bits.get(doc)) { res.clear(doc); } } } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } else { Bits bits = clause.bits; // let res drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (bits.get(doc)) { res.clear(doc); } } } } } // From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there // is already a must or must_not clause. However in the current ES bool filter behaviour at least one should // clause must match in order for a doc to be a match. What we do here is checking if matched docs match with // any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } for (ResultClause slowOrClause : slowOrClauses) { if (slowOrClause.bits.get(setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } }
From source file:org.elasticsearch.search.fetch.matchedqueries.MatchedQueriesFetchSubPhase.java
License:Apache License
private void addMatchedQueries(HitContext hitContext, ImmutableMap<String, Filter> namedFiltersAndQueries, List<String> matchedQueries) { for (Map.Entry<String, Filter> entry : namedFiltersAndQueries.entrySet()) { String name = entry.getKey(); Filter filter = entry.getValue(); try {/*from ww w .j a v a2 s . c om*/ DocIdSet docIdSet = filter.getDocIdSet(hitContext.readerContext(), null); // null is fine, since we filter by hitContext.docId() if (!DocIdSets.isEmpty(docIdSet)) { Bits bits = docIdSet.bits(); if (bits != null) { if (bits.get(hitContext.docId())) { matchedQueries.add(name); } } else { DocIdSetIterator iterator = docIdSet.iterator(); if (iterator != null) { if (iterator.advance(hitContext.docId()) == hitContext.docId()) { matchedQueries.add(name); } } } } } catch (IOException e) { // ignore } finally { SearchContext.current().clearReleasables(); } } }
From source file:org.meresco.lucene.queries.KeyFilter.java
License:Open Source License
public KeyFilter(DocIdSet keySet, String keyName) throws IOException { this.keySet = keySet.bits(); this.keyName = keyName; }