Example usage for org.apache.lucene.util FixedBitSet andNot

List of usage examples for org.apache.lucene.util FixedBitSet andNot

Introduction

In this page you can find the example usage for org.apache.lucene.util FixedBitSet andNot.

Prototype

public void andNot(FixedBitSet other) 

Source Link

Document

this = this AND NOT other

Usage

From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java

License:Apache License

/**
 * Returns the a DocIdSetIterator representing the Boolean composition
 * of the filters that have been added.//from w  w  w .  j  a  v  a 2  s . c  o  m
 */
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
    FixedBitSet res = null;
    final AtomicReader reader = context.reader();

    // optimize single case...
    if (clauses.size() == 1) {
        FilterClause clause = clauses.get(0);
        DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
        if (clause.getOccur() == Occur.MUST_NOT) {
            if (DocIdSets.isEmpty(set)) {
                return new AllDocIdSet(reader.maxDoc());
            } else {
                return new NotDocIdSet(set, reader.maxDoc());
            }
        }
        // SHOULD or MUST, just return the set...
        if (DocIdSets.isEmpty(set)) {
            return null;
        }
        return set;
    }

    // first, go over and see if we can shortcut the execution
    // and gather Bits if we need to
    List<ResultClause> results = new ArrayList<ResultClause>(clauses.size());
    boolean hasShouldClauses = false;
    boolean hasNonEmptyShouldClause = false;
    boolean hasMustClauses = false;
    boolean hasMustNotClauses = false;
    for (int i = 0; i < clauses.size(); i++) {
        FilterClause clause = clauses.get(i);
        DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
        if (clause.getOccur() == Occur.MUST) {
            hasMustClauses = true;
            if (DocIdSets.isEmpty(set)) {
                return null;
            }
        } else if (clause.getOccur() == Occur.SHOULD) {
            hasShouldClauses = true;
            if (DocIdSets.isEmpty(set)) {
                continue;
            }
            hasNonEmptyShouldClause = true;
        } else if (clause.getOccur() == Occur.MUST_NOT) {
            hasMustNotClauses = true;
            if (DocIdSets.isEmpty(set)) {
                // we mark empty ones as null for must_not, handle it in the next run...
                results.add(new ResultClause(null, null, clause));
                continue;
            }
        }
        Bits bits = null;
        if (!DocIdSets.isFastIterator(set)) {
            bits = set.bits();
        }
        results.add(new ResultClause(set, bits, clause));
    }

    if (hasShouldClauses && !hasNonEmptyShouldClause) {
        return null;
    }

    // now, go over the clauses and apply the "fast" ones first...
    hasNonEmptyShouldClause = false;
    boolean hasBits = false;
    // But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs
    // that don't match with a must or must_not clause.
    List<ResultClause> fastOrClauses = new ArrayList<ResultClause>();
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        // we apply bits in based ones (slow) in the second run
        if (clause.bits != null) {
            hasBits = true;
            continue;
        }
        if (clause.clause.getOccur() == Occur.SHOULD) {
            if (hasMustClauses || hasMustNotClauses) {
                fastOrClauses.add(clause);
            } else if (res == null) {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    hasNonEmptyShouldClause = true;
                    res = new FixedBitSet(reader.maxDoc());
                    res.or(it);
                }
            } else {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    hasNonEmptyShouldClause = true;
                    res.or(it);
                }
            }
        }
    }

    // Now we safely handle the "fast" must and must_not clauses.
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        // we apply bits in based ones (slow) in the second run
        if (clause.bits != null) {
            hasBits = true;
            continue;
        }
        if (clause.clause.getOccur() == Occur.MUST) {
            DocIdSetIterator it = clause.docIdSet.iterator();
            if (it == null) {
                return null;
            }
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.or(it);
            } else {
                res.and(it);
            }
        } else if (clause.clause.getOccur() == Occur.MUST_NOT) {
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
            }
            if (clause.docIdSet != null) {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    res.andNot(it);
                }
            }
        }
    }

    if (!hasBits) {
        if (!fastOrClauses.isEmpty()) {
            DocIdSetIterator it = res.iterator();
            at_least_one_should_clause_iter: for (int setDoc = it
                    .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
                for (ResultClause fastOrClause : fastOrClauses) {
                    DocIdSetIterator clauseIterator = fastOrClause.iterator();
                    if (clauseIterator == null) {
                        continue;
                    }
                    if (iteratorMatch(clauseIterator, setDoc)) {
                        hasNonEmptyShouldClause = true;
                        continue at_least_one_should_clause_iter;
                    }
                }
                res.clear(setDoc);
            }
        }

        if (hasShouldClauses && !hasNonEmptyShouldClause) {
            return null;
        } else {
            return res;
        }
    }

    // we have some clauses with bits, apply them...
    // we let the "res" drive the computation, and check Bits for that
    List<ResultClause> slowOrClauses = new ArrayList<ResultClause>();
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        if (clause.bits == null) {
            continue;
        }
        if (clause.clause.getOccur() == Occur.SHOULD) {
            if (hasMustClauses || hasMustNotClauses) {
                slowOrClauses.add(clause);
            } else {
                if (res == null) {
                    DocIdSetIterator it = clause.docIdSet.iterator();
                    if (it == null) {
                        continue;
                    }
                    hasNonEmptyShouldClause = true;
                    res = new FixedBitSet(reader.maxDoc());
                    res.or(it);
                } else {
                    for (int doc = 0; doc < reader.maxDoc(); doc++) {
                        if (!res.get(doc) && clause.bits.get(doc)) {
                            hasNonEmptyShouldClause = true;
                            res.set(doc);
                        }
                    }
                }
            }
        } else if (clause.clause.getOccur() == Occur.MUST) {
            if (res == null) {
                // nothing we can do, just or it...
                res = new FixedBitSet(reader.maxDoc());
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it == null) {
                    return null;
                }
                res.or(it);
            } else {
                Bits bits = clause.bits;
                // use the "res" to drive the iteration
                DocIdSetIterator it = res.iterator();
                for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                    if (!bits.get(doc)) {
                        res.clear(doc);
                    }
                }
            }
        } else if (clause.clause.getOccur() == Occur.MUST_NOT) {
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    res.andNot(it);
                }
            } else {
                Bits bits = clause.bits;
                // let res drive the iteration
                DocIdSetIterator it = res.iterator();
                for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                    if (bits.get(doc)) {
                        res.clear(doc);
                    }
                }
            }
        }
    }

    // From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there
    // is already a must or must_not clause. However in the current ES bool filter behaviour at least one should
    // clause must match in order for a doc to be a match. What we do here is checking if matched docs match with
    // any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour
    if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) {
        DocIdSetIterator it = res.iterator();
        at_least_one_should_clause_iter: for (int setDoc = it
                .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
            for (ResultClause fastOrClause : fastOrClauses) {
                DocIdSetIterator clauseIterator = fastOrClause.iterator();
                if (clauseIterator == null) {
                    continue;
                }
                if (iteratorMatch(clauseIterator, setDoc)) {
                    hasNonEmptyShouldClause = true;
                    continue at_least_one_should_clause_iter;
                }
            }
            for (ResultClause slowOrClause : slowOrClauses) {
                if (slowOrClause.bits.get(setDoc)) {
                    hasNonEmptyShouldClause = true;
                    continue at_least_one_should_clause_iter;
                }
            }
            res.clear(setDoc);
        }
    }

    if (hasShouldClauses && !hasNonEmptyShouldClause) {
        return null;
    } else {
        return res;
    }

}