Example usage for org.apache.lucene.search DocIdSet bits

List of usage examples for org.apache.lucene.search DocIdSet bits

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSet bits.

Prototype

public Bits bits() throws IOException 

Source Link

Document

Optionally provides a Bits interface for random access to matching documents.

Usage

From source file:de.unihildesheim.iw.lucene.util.DocIdSetUtils.java

License:Open Source License

/**
 * Get the highest document id stored in the {@link DocIdSet}.
 *
 * @param dis DocIdSet//from  www.j a v  a  2  s.c  o  m
 * @return Highest document number or {@code -1}, if there's no document
 * @throws IOException Thrown on low-level i/o-errors
 */
public static int maxDoc(@NotNull final DocIdSet dis) throws IOException {
    final int maxDoc;

    @Nullable
    final DocIdSetIterator disi = dis.iterator();
    if (disi == null) {
        maxDoc = 0;
    } else {
        @Nullable
        BitSet bitSet;
        bitSet = BitSetIterator.getFixedBitSetOrNull(disi);
        if (bitSet == null) {
            bitSet = BitSetIterator.getSparseFixedBitSetOrNull(disi);
        }
        if (bitSet == null) {
            bitSet = BitsUtils.bits2BitSet(dis.bits());
        }

        if (bitSet == null) {
            maxDoc = StreamUtils.stream(dis).sorted().max().getAsInt();
        } else {
            if (bitSet.length() == 0) {
                maxDoc = -1;
            } else if (bitSet.length() == 1) {
                maxDoc = bitSet.get(0) ? 0 : -1;
            } else {
                maxDoc = bitSet.prevSetBit(bitSet.length() - 1);
            }
        }
    }
    return maxDoc;
}

From source file:de.unihildesheim.iw.lucene.util.DocIdSetUtils.java

License:Open Source License

/**
 * Get a bits instance from a DocIdSet./*from   w w w.j a v  a2  s .  c  o  m*/
 *
 * @param dis Set whose bits to get
 * @return Bits or null, if no bits are set
 * @throws IOException Thrown on low-level I/O-errors
 */
@Nullable
public static BitSet bits(@NotNull final DocIdSet dis) throws IOException {
    @Nullable
    final DocIdSetIterator disi = dis.iterator();

    if (disi == null) {
        return null;
    } else {
        @Nullable
        BitSet bitSet;

        bitSet = BitSetIterator.getFixedBitSetOrNull(disi);
        if (bitSet == null) {
            bitSet = BitSetIterator.getSparseFixedBitSetOrNull(disi);
        }
        if (bitSet == null) {
            bitSet = BitsUtils.bits2BitSet(dis.bits());
        }

        if (bitSet == null) {
            bitSet = new SparseFixedBitSet(maxDoc(dis) + 1);
            StreamUtils.stream(disi).forEach(bitSet::set);
        }
        return bitSet;
    }
}

From source file:lucene.security.search.DocumentVisibilityFilter.java

License:Apache License

public static DocIdSet getLogicalOr(final List<DocIdSet> list) throws IOException {
    if (list.size() == 0) {
        return DocIdSet.EMPTY_DOCIDSET;
    }/*from  w  w  w  .  ja va2 s  .c  om*/
    if (list.size() == 1) {
        DocIdSet docIdSet = list.get(0);
        Bits bits = docIdSet.bits();
        if (bits == null) {
            throw new IOException("Bits are not allowed to be null for DocIdSet [" + docIdSet + "].");
        }
        return docIdSet;
    }
    int index = 0;
    final Bits[] bitsArray = new Bits[list.size()];
    int length = -1;
    for (DocIdSet docIdSet : list) {
        Bits bits = docIdSet.bits();
        if (bits == null) {
            throw new IOException("Bits are not allowed to be null for DocIdSet [" + docIdSet + "].");
        }
        bitsArray[index] = bits;
        index++;
        if (length < 0) {
            length = bits.length();
        } else if (length != bits.length()) {
            throw new IOException(
                    "Bits length need to be the same [" + length + "] and [" + bits.length() + "]");
        }
    }
    final int len = length;
    return new DocIdSet() {

        @Override
        public Bits bits() throws IOException {
            return new Bits() {

                @Override
                public boolean get(int index) {
                    for (int i = 0; i < bitsArray.length; i++) {
                        if (bitsArray[i].get(index)) {
                            return true;
                        }
                    }
                    return false;
                }

                @Override
                public int length() {
                    return len;
                }

            };
        }

        @Override
        public boolean isCacheable() {
            return true;
        }

        @Override
        public DocIdSetIterator iterator() throws IOException {
            final DocIdSetIterator[] docIdSetIteratorArray = new DocIdSetIterator[list.size()];
            long c = 0;
            int index = 0;
            for (DocIdSet docIdSet : list) {
                DocIdSetIterator iterator = docIdSet.iterator();
                iterator.nextDoc();
                docIdSetIteratorArray[index] = iterator;
                c += iterator.cost();
                index++;
            }
            final long cost = c;
            return new DocIdSetIterator() {

                private int _docId = -1;

                @Override
                public int advance(int target) throws IOException {
                    callAdvanceOnAllThatAreBehind(target);
                    Arrays.sort(docIdSetIteratorArray, COMPARATOR);
                    DocIdSetIterator iterator = docIdSetIteratorArray[0];
                    return _docId = iterator.docID();
                }

                private void callAdvanceOnAllThatAreBehind(int target) throws IOException {
                    for (int i = 0; i < docIdSetIteratorArray.length; i++) {
                        DocIdSetIterator iterator = docIdSetIteratorArray[i];
                        if (iterator.docID() < target) {
                            iterator.advance(target);
                        }
                    }
                }

                @Override
                public int nextDoc() throws IOException {
                    return advance(_docId + 1);
                }

                @Override
                public int docID() {
                    return _docId;
                }

                @Override
                public long cost() {
                    return cost;
                }

            };
        }
    };
}

From source file:org.apache.solr.request.IntervalFacets.java

License:Apache License

private void getCountString() throws IOException {
    Filter filter = docs.getTopFilter();
    List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        AtomicReaderContext leaf = leaves.get(subIndex);
        DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
        if (dis == null) {
            continue;
        }/*from  w  w  w  . j a  va2  s  .c  o  m*/
        DocIdSetIterator disi = dis.iterator();
        if (disi != null) {
            if (schemaField.multiValued()) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(schemaField.getName());
                if (sub == null) {
                    continue;
                }
                final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    accumIntervalsSingle(singleton, disi, dis.bits());
                } else {
                    accumIntervalsMulti(sub, disi, dis.bits());
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(schemaField.getName());
                if (sub == null) {
                    continue;
                }
                accumIntervalsSingle(sub, disi, dis.bits());
            }
        }
    }
}

From source file:org.apache.solr.search.Filter.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new Weight(this) {

        @Override/*  ww w  . j  ava2s .c o  m*/
        public void extractTerms(Set<Term> terms) {
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            final Scorer scorer = scorer(context);
            final boolean match = (scorer != null && scorer.iterator().advance(doc) == doc);
            if (match) {
                assert scorer.score() == 0f;
                return Explanation.match(0f, "Match on id " + doc);
            } else {
                return Explanation.match(0f, "No match on id " + doc);
            }
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final DocIdSet set = getDocIdSet(context, null);
            if (set == null) {
                return null;
            }
            if (applyLazily && set.bits() != null) {
                final Bits bits = set.bits();
                final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
                final TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
                    @Override
                    public boolean matches() throws IOException {
                        return bits.get(approximation.docID());
                    }

                    @Override
                    public float matchCost() {
                        return 10; // TODO use cost of bits.get()
                    }
                };
                return new ConstantScoreScorer(this, 0f, twoPhase);
            }
            final DocIdSetIterator iterator = set.iterator();
            if (iterator == null) {
                return null;
            }
            return new ConstantScoreScorer(this, 0f, iterator);
        }

    };
}

From source file:org.elasticsearch.common.lucene.docset.AndDocIdSet.java

License:Apache License

@Override
public DocIdSetIterator iterator() throws IOException {
    // we try and be smart here, if we can iterate through docsets quickly, prefer to iterate
    // over them as much as possible, before actually going to "bits" based ones to check
    List<DocIdSet> iterators = new ArrayList<DocIdSet>(sets.length);
    List<Bits> bits = new ArrayList<Bits>(sets.length);
    for (DocIdSet set : sets) {
        if (DocIdSets.isFastIterator(set)) {
            iterators.add(set);/*from   w ww.ja  v  a 2 s.c om*/
        } else {
            Bits bit = set.bits();
            if (bit != null) {
                bits.add(bit);
            } else {
                iterators.add(set);
            }
        }
    }
    if (bits.isEmpty()) {
        return IteratorBasedIterator.newDocIdSetIterator(iterators.toArray(new DocIdSet[iterators.size()]));
    }
    if (iterators.isEmpty()) {
        return new BitsDocIdSetIterator(new AndBits(bits.toArray(new Bits[bits.size()])));
    }
    // combination of both..., first iterating over the "fast" ones, and then checking on the more
    // expensive ones
    return new BitsDocIdSetIterator.FilteredIterator(
            IteratorBasedIterator.newDocIdSetIterator(iterators.toArray(new DocIdSet[iterators.size()])),
            new AndBits(bits.toArray(new Bits[bits.size()])));
}

From source file:org.elasticsearch.common.lucene.docset.DocIdSets.java

License:Apache License

/**
 * Gets a set to bits.//from w ww.  ja v  a  2 s .co  m
 */
public static Bits toSafeBits(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
    if (set == null) {
        return new Bits.MatchNoBits(reader.maxDoc());
    }
    Bits bits = set.bits();
    if (bits != null) {
        return bits;
    }
    DocIdSetIterator iterator = set.iterator();
    if (iterator == null) {
        return new Bits.MatchNoBits(reader.maxDoc());
    }
    return toFixedBitSet(iterator, reader.maxDoc());
}

From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java

License:Apache License

/**
 * Returns the a DocIdSetIterator representing the Boolean composition
 * of the filters that have been added./*from  ww  w .  j a va2 s  .  c o  m*/
 */
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
    FixedBitSet res = null;
    final AtomicReader reader = context.reader();

    // optimize single case...
    if (clauses.size() == 1) {
        FilterClause clause = clauses.get(0);
        DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
        if (clause.getOccur() == Occur.MUST_NOT) {
            if (DocIdSets.isEmpty(set)) {
                return new AllDocIdSet(reader.maxDoc());
            } else {
                return new NotDocIdSet(set, reader.maxDoc());
            }
        }
        // SHOULD or MUST, just return the set...
        if (DocIdSets.isEmpty(set)) {
            return null;
        }
        return set;
    }

    // first, go over and see if we can shortcut the execution
    // and gather Bits if we need to
    List<ResultClause> results = new ArrayList<ResultClause>(clauses.size());
    boolean hasShouldClauses = false;
    boolean hasNonEmptyShouldClause = false;
    boolean hasMustClauses = false;
    boolean hasMustNotClauses = false;
    for (int i = 0; i < clauses.size(); i++) {
        FilterClause clause = clauses.get(i);
        DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
        if (clause.getOccur() == Occur.MUST) {
            hasMustClauses = true;
            if (DocIdSets.isEmpty(set)) {
                return null;
            }
        } else if (clause.getOccur() == Occur.SHOULD) {
            hasShouldClauses = true;
            if (DocIdSets.isEmpty(set)) {
                continue;
            }
            hasNonEmptyShouldClause = true;
        } else if (clause.getOccur() == Occur.MUST_NOT) {
            hasMustNotClauses = true;
            if (DocIdSets.isEmpty(set)) {
                // we mark empty ones as null for must_not, handle it in the next run...
                results.add(new ResultClause(null, null, clause));
                continue;
            }
        }
        Bits bits = null;
        if (!DocIdSets.isFastIterator(set)) {
            bits = set.bits();
        }
        results.add(new ResultClause(set, bits, clause));
    }

    if (hasShouldClauses && !hasNonEmptyShouldClause) {
        return null;
    }

    // now, go over the clauses and apply the "fast" ones first...
    hasNonEmptyShouldClause = false;
    boolean hasBits = false;
    // But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs
    // that don't match with a must or must_not clause.
    List<ResultClause> fastOrClauses = new ArrayList<ResultClause>();
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        // we apply bits in based ones (slow) in the second run
        if (clause.bits != null) {
            hasBits = true;
            continue;
        }
        if (clause.clause.getOccur() == Occur.SHOULD) {
            if (hasMustClauses || hasMustNotClauses) {
                fastOrClauses.add(clause);
            } else if (res == null) {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    hasNonEmptyShouldClause = true;
                    res = new FixedBitSet(reader.maxDoc());
                    res.or(it);
                }
            } else {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    hasNonEmptyShouldClause = true;
                    res.or(it);
                }
            }
        }
    }

    // Now we safely handle the "fast" must and must_not clauses.
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        // we apply bits in based ones (slow) in the second run
        if (clause.bits != null) {
            hasBits = true;
            continue;
        }
        if (clause.clause.getOccur() == Occur.MUST) {
            DocIdSetIterator it = clause.docIdSet.iterator();
            if (it == null) {
                return null;
            }
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.or(it);
            } else {
                res.and(it);
            }
        } else if (clause.clause.getOccur() == Occur.MUST_NOT) {
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
            }
            if (clause.docIdSet != null) {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    res.andNot(it);
                }
            }
        }
    }

    if (!hasBits) {
        if (!fastOrClauses.isEmpty()) {
            DocIdSetIterator it = res.iterator();
            at_least_one_should_clause_iter: for (int setDoc = it
                    .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
                for (ResultClause fastOrClause : fastOrClauses) {
                    DocIdSetIterator clauseIterator = fastOrClause.iterator();
                    if (clauseIterator == null) {
                        continue;
                    }
                    if (iteratorMatch(clauseIterator, setDoc)) {
                        hasNonEmptyShouldClause = true;
                        continue at_least_one_should_clause_iter;
                    }
                }
                res.clear(setDoc);
            }
        }

        if (hasShouldClauses && !hasNonEmptyShouldClause) {
            return null;
        } else {
            return res;
        }
    }

    // we have some clauses with bits, apply them...
    // we let the "res" drive the computation, and check Bits for that
    List<ResultClause> slowOrClauses = new ArrayList<ResultClause>();
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        if (clause.bits == null) {
            continue;
        }
        if (clause.clause.getOccur() == Occur.SHOULD) {
            if (hasMustClauses || hasMustNotClauses) {
                slowOrClauses.add(clause);
            } else {
                if (res == null) {
                    DocIdSetIterator it = clause.docIdSet.iterator();
                    if (it == null) {
                        continue;
                    }
                    hasNonEmptyShouldClause = true;
                    res = new FixedBitSet(reader.maxDoc());
                    res.or(it);
                } else {
                    for (int doc = 0; doc < reader.maxDoc(); doc++) {
                        if (!res.get(doc) && clause.bits.get(doc)) {
                            hasNonEmptyShouldClause = true;
                            res.set(doc);
                        }
                    }
                }
            }
        } else if (clause.clause.getOccur() == Occur.MUST) {
            if (res == null) {
                // nothing we can do, just or it...
                res = new FixedBitSet(reader.maxDoc());
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it == null) {
                    return null;
                }
                res.or(it);
            } else {
                Bits bits = clause.bits;
                // use the "res" to drive the iteration
                DocIdSetIterator it = res.iterator();
                for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                    if (!bits.get(doc)) {
                        res.clear(doc);
                    }
                }
            }
        } else if (clause.clause.getOccur() == Occur.MUST_NOT) {
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    res.andNot(it);
                }
            } else {
                Bits bits = clause.bits;
                // let res drive the iteration
                DocIdSetIterator it = res.iterator();
                for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                    if (bits.get(doc)) {
                        res.clear(doc);
                    }
                }
            }
        }
    }

    // From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there
    // is already a must or must_not clause. However in the current ES bool filter behaviour at least one should
    // clause must match in order for a doc to be a match. What we do here is checking if matched docs match with
    // any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour
    if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) {
        DocIdSetIterator it = res.iterator();
        at_least_one_should_clause_iter: for (int setDoc = it
                .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
            for (ResultClause fastOrClause : fastOrClauses) {
                DocIdSetIterator clauseIterator = fastOrClause.iterator();
                if (clauseIterator == null) {
                    continue;
                }
                if (iteratorMatch(clauseIterator, setDoc)) {
                    hasNonEmptyShouldClause = true;
                    continue at_least_one_should_clause_iter;
                }
            }
            for (ResultClause slowOrClause : slowOrClauses) {
                if (slowOrClause.bits.get(setDoc)) {
                    hasNonEmptyShouldClause = true;
                    continue at_least_one_should_clause_iter;
                }
            }
            res.clear(setDoc);
        }
    }

    if (hasShouldClauses && !hasNonEmptyShouldClause) {
        return null;
    } else {
        return res;
    }

}

From source file:org.elasticsearch.search.fetch.matchedqueries.MatchedQueriesFetchSubPhase.java

License:Apache License

private void addMatchedQueries(HitContext hitContext, ImmutableMap<String, Filter> namedFiltersAndQueries,
        List<String> matchedQueries) {
    for (Map.Entry<String, Filter> entry : namedFiltersAndQueries.entrySet()) {
        String name = entry.getKey();
        Filter filter = entry.getValue();
        try {/*from ww w  .j  a v a2  s .  c  om*/
            DocIdSet docIdSet = filter.getDocIdSet(hitContext.readerContext(), null); // null is fine, since we filter by hitContext.docId()
            if (!DocIdSets.isEmpty(docIdSet)) {
                Bits bits = docIdSet.bits();
                if (bits != null) {
                    if (bits.get(hitContext.docId())) {
                        matchedQueries.add(name);
                    }
                } else {
                    DocIdSetIterator iterator = docIdSet.iterator();
                    if (iterator != null) {
                        if (iterator.advance(hitContext.docId()) == hitContext.docId()) {
                            matchedQueries.add(name);
                        }
                    }
                }
            }
        } catch (IOException e) {
            // ignore
        } finally {
            SearchContext.current().clearReleasables();
        }
    }
}

From source file:org.meresco.lucene.queries.KeyFilter.java

License:Open Source License

public KeyFilter(DocIdSet keySet, String keyName) throws IOException {
    this.keySet = keySet.bits();
    this.keyName = keyName;
}