List of usage examples for org.apache.lucene.util FixedBitSet andNot
public void andNot(FixedBitSet other)
From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java
License:Apache License
/** * Returns the a DocIdSetIterator representing the Boolean composition * of the filters that have been added.//from w w w . j a v a 2 s . c o m */ @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { FixedBitSet res = null; final AtomicReader reader = context.reader(); // optimize single case... if (clauses.size() == 1) { FilterClause clause = clauses.get(0); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST_NOT) { if (DocIdSets.isEmpty(set)) { return new AllDocIdSet(reader.maxDoc()); } else { return new NotDocIdSet(set, reader.maxDoc()); } } // SHOULD or MUST, just return the set... if (DocIdSets.isEmpty(set)) { return null; } return set; } // first, go over and see if we can shortcut the execution // and gather Bits if we need to List<ResultClause> results = new ArrayList<ResultClause>(clauses.size()); boolean hasShouldClauses = false; boolean hasNonEmptyShouldClause = false; boolean hasMustClauses = false; boolean hasMustNotClauses = false; for (int i = 0; i < clauses.size(); i++) { FilterClause clause = clauses.get(i); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST) { hasMustClauses = true; if (DocIdSets.isEmpty(set)) { return null; } } else if (clause.getOccur() == Occur.SHOULD) { hasShouldClauses = true; if (DocIdSets.isEmpty(set)) { continue; } hasNonEmptyShouldClause = true; } else if (clause.getOccur() == Occur.MUST_NOT) { hasMustNotClauses = true; if (DocIdSets.isEmpty(set)) { // we mark empty ones as null for must_not, handle it in the next run... results.add(new ResultClause(null, null, clause)); continue; } } Bits bits = null; if (!DocIdSets.isFastIterator(set)) { bits = set.bits(); } results.add(new ResultClause(set, bits, clause)); } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } // now, go over the clauses and apply the "fast" ones first... hasNonEmptyShouldClause = false; boolean hasBits = false; // But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs // that don't match with a must or must_not clause. List<ResultClause> fastOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { fastOrClauses.add(clause); } else if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } } else { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res.or(it); } } } } // Now we safely handle the "fast" must and must_not clauses. for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.MUST) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { res.and(it); } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs } if (clause.docIdSet != null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } } } if (!hasBits) { if (!fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } } // we have some clauses with bits, apply them... // we let the "res" drive the computation, and check Bits for that List<ResultClause> slowOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); if (clause.bits == null) { continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { slowOrClauses.add(clause); } else { if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { continue; } hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { for (int doc = 0; doc < reader.maxDoc(); doc++) { if (!res.get(doc) && clause.bits.get(doc)) { hasNonEmptyShouldClause = true; res.set(doc); } } } } } else if (clause.clause.getOccur() == Occur.MUST) { if (res == null) { // nothing we can do, just or it... res = new FixedBitSet(reader.maxDoc()); DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } res.or(it); } else { Bits bits = clause.bits; // use the "res" to drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (!bits.get(doc)) { res.clear(doc); } } } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } else { Bits bits = clause.bits; // let res drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (bits.get(doc)) { res.clear(doc); } } } } } // From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there // is already a must or must_not clause. However in the current ES bool filter behaviour at least one should // clause must match in order for a doc to be a match. What we do here is checking if matched docs match with // any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } for (ResultClause slowOrClause : slowOrClauses) { if (slowOrClause.bits.get(setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } }