Example usage for org.apache.lucene.util FixedBitSet set

List of usage examples for org.apache.lucene.util FixedBitSet set

Introduction

In this page you can find the example usage for org.apache.lucene.util FixedBitSet set.

Prototype

public void set(int startIndex, int endIndex) 

Source Link

Document

Sets a range of bits

Usage

From source file:DocIdSetBenchmark.java

License:Apache License

protected static FixedBitSet randomSet(int numBits, int numBitsSet) {
    assert numBitsSet <= numBits;
    final FixedBitSet set = new FixedBitSet(numBits);
    if (numBitsSet == numBits) {
        set.set(0, numBits);
    } else {//from   ww w  .  j  a  va 2 s. c o m
        for (int i = 0; i < numBitsSet; ++i) {
            while (true) {
                final int o = RANDOM.nextInt(numBits);
                if (!set.get(o)) {
                    set.set(o);
                    break;
                }
            }
        }
    }
    return set;
}

From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs)
        throws IOException {
    FixedBitSet checkBits;
    final LeafReader reader = context.reader();
    final int maxDoc = reader.maxDoc();

    BitSet finalBits = new SparseFixedBitSet(maxDoc);
    if (acceptDocs == null) {
        checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs());
        if (checkBits == null) {
            // all live
            checkBits = new FixedBitSet(maxDoc);
            checkBits.set(0, checkBits.length());
        }/* w ww  . jav a  2 s  .c  o  m*/
    } else {
        checkBits = BitsUtils.bits2FixedBitSet(acceptDocs);
    }

    @Nullable
    final Terms terms = reader.terms(this.field);
    if (terms != null) {
        final int termsDocCount = terms.getDocCount();

        if (termsDocCount != 0) {
            if (termsDocCount == maxDoc) {
                // all matching
                finalBits = checkBits;
            } else {
                @Nullable
                final Terms t = reader.terms(this.field);
                if (t != null) {
                    PostingsEnum pe = null;
                    final TermsEnum te = t.iterator(null);
                    int docId;
                    while (te.next() != null) {
                        pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE);
                        while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (checkBits.getAndClear(docId)) {
                                finalBits.set(docId);
                            }
                        }
                    }
                }
            }
        }
    }
    return new BitDocIdSet(finalBits);
}

From source file:net.conquiris.lucene.search.NegatingFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    final int n = reader.maxDoc();
    final FixedBitSet bits = new FixedBitSet(reader.maxDoc());
    final DocIdSet set = filter.getDocIdSet(reader);
    if (set == null || set == DocIdSet.EMPTY_DOCIDSET) {
        bits.set(0, n);
    } else {//from w  w  w  . j  a va2  s .c  o m
        DocIdSetIterator i = set.iterator();
        if (i == null) {
            bits.set(0, n);
        } else {
            bits.or(i);
            bits.flip(0, n);
        }
    }
    return bits;
}

From source file:org.apache.solr.search.CitationLRUCache.java

License:Apache License

private void warmIncrementally(SolrIndexSearcher searcher, SolrCache<K, V> old) throws IOException {
    if (regenerator == null)
        return;//  www . ja  va 2s  .  c o  m

    //System.out.println("regenerator: " + regenerator);

    Map<String, List<String>> fields = getFields(searcher, this.identifierFields);
    if (fields.get("textClasses").size() > 0 || fields.get("textClassesMV").size() > 0) {
        synchronized (map) {
            treatIdentifiersAsText = true;
        }
    }

    long warmingStartTime = System.currentTimeMillis();
    CitationLRUCache<K, V> other = (CitationLRUCache<K, V>) old;

    // collect ids of documents that need to be reloaded/regenerated during this
    // warmup run
    //System.out.println("searcher: " + searcher.toString());
    //System.out.println("maxDoc: " + searcher.getIndexReader().maxDoc());
    FixedBitSet toRefresh = new FixedBitSet(searcher.getIndexReader().maxDoc());

    //System.out.println("version=" + searcher.getIndexReader().getVersion());
    //try {
    //System.out.println("commit=" + searcher.getIndexReader().getIndexCommit());
    //} catch (IOException e2) {
    // TODO Auto-generated catch block
    //e2.printStackTrace();
    //}

    //    for (IndexReaderContext c : searcher.getTopReaderContext().children()) {
    //       //System.out.println("context=" + c.reader().getCombinedCoreAndDeletesKey());
    //    }

    //    for (IndexReaderContext l : searcher.getIndexReader().leaves()) {
    //       //System.out.println(l);
    //    }

    Bits liveDocs = searcher.getAtomicReader().getLiveDocs();
    //System.out.println(liveDocs == null ? "liveDocs=" + null : "liveDocs=" + liveDocs.length());
    //System.out.println("numDeletes=" + searcher.getAtomicReader().numDeletedDocs());

    if (liveDocs == null) { // everything is new, this could be fresh index or merged/optimized index too

        //searcher.getAtomicReader().getContext().children().size()

        //other.map.clear(); // force regeneration
        toRefresh.set(0, toRefresh.length());

        // Build the mapping from indexed values into lucene ids
        // this must always be available, so we build it no matter what...
        // XXX: make it update only the necessary IDs (not the whole index)
        unInvertedTheDamnThing(searcher.getAtomicReader(), fields, liveDocs, new KVSetter() {
            @SuppressWarnings("unchecked")
            @Override
            public void set(int docbase, int docid, Object value) {
                put((K) value, (V) (Integer) (docbase + docid));
            }
        });

    } else if (liveDocs != null) {

        Integer luceneId;
        for (V v : other.map.values()) {
            luceneId = ((Integer) v);
            if (luceneId <= liveDocs.length() && !liveDocs.get(luceneId)) { // doc was either deleted or updated
                //System.out.println("Found deleted: " + luceneId);
                // retrieve all citations/references for this luceneId and mark these docs to be refreshed
            }
        }

        for (int i = 0; i < toRefresh.length(); i++) {
            if (liveDocs.get(i)) {
                toRefresh.set(i);
            }
        }
    }

    // warm entries
    if (isAutowarmingOn()) {
        Object[] keys, vals = null;

        // Don't do the autowarming in the synchronized block, just pull out the keys and values.
        synchronized (other.map) {

            int sz = autowarm.getWarmCount(other.map.size());

            keys = new Object[sz];
            vals = new Object[sz];

            Iterator<Map.Entry<K, V>> iter = other.map.entrySet().iterator();

            // iteration goes from oldest (least recently used) to most recently used,
            // so we need to skip over the oldest entries.
            int skip = other.map.size() - sz;
            for (int i = 0; i < skip; i++)
                iter.next();

            for (int i = 0; i < sz; i++) {
                Map.Entry<K, V> entry = iter.next();
                keys[i] = entry.getKey();
                vals[i] = entry.getValue();
            }
        }

        // autowarm from the oldest to the newest entries so that the ordering will be
        // correct in the new cache.
        for (int i = 0; i < keys.length; i++) {
            try {
                boolean continueRegen = true;
                if (isModified(liveDocs, keys[i], vals[i])) {
                    toRefresh.set((Integer) keys[i]);
                } else {
                    continueRegen = regenerator.regenerateItem(searcher, this, old, keys[i], vals[i]);
                }
                if (!continueRegen)
                    break;
            } catch (Throwable e) {
                SolrException.log(log, "Error during auto-warming of key:" + keys[i], e);
            }
        }
    }

    warmupTime = System.currentTimeMillis() - warmingStartTime;
}

From source file:org.elasticsearch.common.lucene.search.AndDocIdSetTests.java

License:Apache License

public void testDuel() throws IOException {
    for (int iter = 0; iter < 1000; ++iter) {
        final int numSets = 1 + random().nextInt(5);
        final int numDocs = 1 + random().nextInt(1000);
        FixedBitSet anded = new FixedBitSet(numDocs);
        anded.set(0, numDocs);
        final DocIdSet[] sets = new DocIdSet[numSets];
        for (int i = 0; i < numSets; ++i) {
            final FixedBitSet randomSet = randomBitSet(numDocs);

            anded.and(randomSet);//from w ww  .ja  v  a 2  s  .c  o  m

            if (random().nextBoolean()) {
                // will be considered 'fast' by AndDocIdSet
                sets[i] = new BitDocIdSet(randomSet);
            } else {
                // will be considered 'slow' by AndDocIdSet
                sets[i] = new DocValuesDocIdSet(numDocs, null) {
                    @Override
                    protected boolean matchDoc(int doc) {
                        return randomSet.get(doc);
                    }
                };
            }
        }
        AndDocIdSet andSet = new AndDocIdSet(sets);
        Bits andBits = andSet.bits();
        if (andBits != null) {
            for (int i = 0; i < numDocs; ++i) {
                assertEquals(anded.get(i), andBits.get(i));
            }
        }
        DocIdSetIterator andIt = andSet.iterator();
        if (andIt == null) {
            assertEquals(0, anded.cardinality());
        } else {
            int previous = -1;
            for (int doc = andIt.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = andIt.nextDoc()) {
                for (int j = previous + 1; j < doc; ++j) {
                    assertFalse(anded.get(j));
                }
                assertTrue(anded.get(doc));
                previous = doc;
            }
            for (int j = previous + 1; j < numDocs; ++j) {
                assertFalse(anded.get(j));
            }
        }
    }
}

From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java

License:Apache License

/**
 * Returns the a DocIdSetIterator representing the Boolean composition
 * of the filters that have been added./*from w  ww .ja va 2  s . c  o m*/
 */
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
    FixedBitSet res = null;
    final AtomicReader reader = context.reader();

    // optimize single case...
    if (clauses.size() == 1) {
        FilterClause clause = clauses.get(0);
        DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
        if (clause.getOccur() == Occur.MUST_NOT) {
            if (DocIdSets.isEmpty(set)) {
                return new AllDocIdSet(reader.maxDoc());
            } else {
                return new NotDocIdSet(set, reader.maxDoc());
            }
        }
        // SHOULD or MUST, just return the set...
        if (DocIdSets.isEmpty(set)) {
            return null;
        }
        return set;
    }

    // first, go over and see if we can shortcut the execution
    // and gather Bits if we need to
    List<ResultClause> results = new ArrayList<ResultClause>(clauses.size());
    boolean hasShouldClauses = false;
    boolean hasNonEmptyShouldClause = false;
    boolean hasMustClauses = false;
    boolean hasMustNotClauses = false;
    for (int i = 0; i < clauses.size(); i++) {
        FilterClause clause = clauses.get(i);
        DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
        if (clause.getOccur() == Occur.MUST) {
            hasMustClauses = true;
            if (DocIdSets.isEmpty(set)) {
                return null;
            }
        } else if (clause.getOccur() == Occur.SHOULD) {
            hasShouldClauses = true;
            if (DocIdSets.isEmpty(set)) {
                continue;
            }
            hasNonEmptyShouldClause = true;
        } else if (clause.getOccur() == Occur.MUST_NOT) {
            hasMustNotClauses = true;
            if (DocIdSets.isEmpty(set)) {
                // we mark empty ones as null for must_not, handle it in the next run...
                results.add(new ResultClause(null, null, clause));
                continue;
            }
        }
        Bits bits = null;
        if (!DocIdSets.isFastIterator(set)) {
            bits = set.bits();
        }
        results.add(new ResultClause(set, bits, clause));
    }

    if (hasShouldClauses && !hasNonEmptyShouldClause) {
        return null;
    }

    // now, go over the clauses and apply the "fast" ones first...
    hasNonEmptyShouldClause = false;
    boolean hasBits = false;
    // But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs
    // that don't match with a must or must_not clause.
    List<ResultClause> fastOrClauses = new ArrayList<ResultClause>();
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        // we apply bits in based ones (slow) in the second run
        if (clause.bits != null) {
            hasBits = true;
            continue;
        }
        if (clause.clause.getOccur() == Occur.SHOULD) {
            if (hasMustClauses || hasMustNotClauses) {
                fastOrClauses.add(clause);
            } else if (res == null) {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    hasNonEmptyShouldClause = true;
                    res = new FixedBitSet(reader.maxDoc());
                    res.or(it);
                }
            } else {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    hasNonEmptyShouldClause = true;
                    res.or(it);
                }
            }
        }
    }

    // Now we safely handle the "fast" must and must_not clauses.
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        // we apply bits in based ones (slow) in the second run
        if (clause.bits != null) {
            hasBits = true;
            continue;
        }
        if (clause.clause.getOccur() == Occur.MUST) {
            DocIdSetIterator it = clause.docIdSet.iterator();
            if (it == null) {
                return null;
            }
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.or(it);
            } else {
                res.and(it);
            }
        } else if (clause.clause.getOccur() == Occur.MUST_NOT) {
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
            }
            if (clause.docIdSet != null) {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    res.andNot(it);
                }
            }
        }
    }

    if (!hasBits) {
        if (!fastOrClauses.isEmpty()) {
            DocIdSetIterator it = res.iterator();
            at_least_one_should_clause_iter: for (int setDoc = it
                    .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
                for (ResultClause fastOrClause : fastOrClauses) {
                    DocIdSetIterator clauseIterator = fastOrClause.iterator();
                    if (clauseIterator == null) {
                        continue;
                    }
                    if (iteratorMatch(clauseIterator, setDoc)) {
                        hasNonEmptyShouldClause = true;
                        continue at_least_one_should_clause_iter;
                    }
                }
                res.clear(setDoc);
            }
        }

        if (hasShouldClauses && !hasNonEmptyShouldClause) {
            return null;
        } else {
            return res;
        }
    }

    // we have some clauses with bits, apply them...
    // we let the "res" drive the computation, and check Bits for that
    List<ResultClause> slowOrClauses = new ArrayList<ResultClause>();
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        if (clause.bits == null) {
            continue;
        }
        if (clause.clause.getOccur() == Occur.SHOULD) {
            if (hasMustClauses || hasMustNotClauses) {
                slowOrClauses.add(clause);
            } else {
                if (res == null) {
                    DocIdSetIterator it = clause.docIdSet.iterator();
                    if (it == null) {
                        continue;
                    }
                    hasNonEmptyShouldClause = true;
                    res = new FixedBitSet(reader.maxDoc());
                    res.or(it);
                } else {
                    for (int doc = 0; doc < reader.maxDoc(); doc++) {
                        if (!res.get(doc) && clause.bits.get(doc)) {
                            hasNonEmptyShouldClause = true;
                            res.set(doc);
                        }
                    }
                }
            }
        } else if (clause.clause.getOccur() == Occur.MUST) {
            if (res == null) {
                // nothing we can do, just or it...
                res = new FixedBitSet(reader.maxDoc());
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it == null) {
                    return null;
                }
                res.or(it);
            } else {
                Bits bits = clause.bits;
                // use the "res" to drive the iteration
                DocIdSetIterator it = res.iterator();
                for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                    if (!bits.get(doc)) {
                        res.clear(doc);
                    }
                }
            }
        } else if (clause.clause.getOccur() == Occur.MUST_NOT) {
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    res.andNot(it);
                }
            } else {
                Bits bits = clause.bits;
                // let res drive the iteration
                DocIdSetIterator it = res.iterator();
                for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                    if (bits.get(doc)) {
                        res.clear(doc);
                    }
                }
            }
        }
    }

    // From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there
    // is already a must or must_not clause. However in the current ES bool filter behaviour at least one should
    // clause must match in order for a doc to be a match. What we do here is checking if matched docs match with
    // any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour
    if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) {
        DocIdSetIterator it = res.iterator();
        at_least_one_should_clause_iter: for (int setDoc = it
                .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
            for (ResultClause fastOrClause : fastOrClauses) {
                DocIdSetIterator clauseIterator = fastOrClause.iterator();
                if (clauseIterator == null) {
                    continue;
                }
                if (iteratorMatch(clauseIterator, setDoc)) {
                    hasNonEmptyShouldClause = true;
                    continue at_least_one_should_clause_iter;
                }
            }
            for (ResultClause slowOrClause : slowOrClauses) {
                if (slowOrClause.bits.get(setDoc)) {
                    hasNonEmptyShouldClause = true;
                    continue at_least_one_should_clause_iter;
                }
            }
            res.clear(setDoc);
        }
    }

    if (hasShouldClauses && !hasNonEmptyShouldClause) {
        return null;
    } else {
        return res;
    }

}

From source file:org.elasticsearch.search.aggregations.metrics.RedBlackTreeTests.java

License:Apache License

public void testRemove() {
    final int numValues = scaledRandomIntBetween(200, 1000);
    final FixedBitSet values = new FixedBitSet(numValues);
    values.set(0, numValues);
    IntRedBlackTree tree = new IntRedBlackTree();
    for (int i = 0; i < numValues; ++i) {
        tree.add(i);/*  w w w . j av  a2 s .c  o m*/
    }

    final int iters = scaledRandomIntBetween(300, 1000);
    for (int i = 0; i < iters; ++i) {
        final int value = randomInt(numValues - 1);
        final boolean removed = tree.remove(value);
        assertEquals(removed, values.get(value));
        values.clear(value);
        assertEquals(values.cardinality(), tree.size());
        tree.assertConsistent();
    }

    int size = 0;
    int previousValue = Integer.MIN_VALUE;
    for (IntCursor cursor : tree) {
        ++size;
        final int value = tree.values[cursor.value];
        assertTrue(previousValue < value);
        assertTrue(values.get(value));
        previousValue = value;
    }
    assertEquals(values.cardinality(), size);
}