Example usage for org.apache.lucene.util FixedBitSet get

List of usage examples for org.apache.lucene.util FixedBitSet get

Introduction

In this page you can find the example usage for org.apache.lucene.util FixedBitSet get.

Prototype

@Override
    public boolean get(int index) 

Source Link

Usage

From source file:DocIdSetBenchmark.java

License:Apache License

protected static FixedBitSet randomSet(int numBits, int numBitsSet) {
    assert numBitsSet <= numBits;
    final FixedBitSet set = new FixedBitSet(numBits);
    if (numBitsSet == numBits) {
        set.set(0, numBits);//from  w  w w  .  j a  v a2s.com
    } else {
        for (int i = 0; i < numBitsSet; ++i) {
            while (true) {
                final int o = RANDOM.nextInt(numBits);
                if (!set.get(o)) {
                    set.set(o);
                    break;
                }
            }
        }
    }
    return set;
}

From source file:de.unihildesheim.iw.lucene.query.QueryUtils.java

License:Open Source License

/**
 * Remove terms from the given collection, if they are not found in the
 * collection.//from  w w  w  .jav  a2 s. co m
 *
 * @param dataProv IndexDataProvider
 * @param terms Collection of terms to check against the collection
 * @return Passed in terms with non-collection terms removed
 */
@SuppressFBWarnings("LO_APPENDED_STRING_IN_FORMAT_STRING")
private static BytesRefArray removeUnknownTerms(@NotNull final IndexDataProvider dataProv,
        @NotNull final BytesRefArray terms) {
    final StringBuilder sb = new StringBuilder("Skipped terms (stopword or not in collection): [");
    final FixedBitSet bits = new FixedBitSet(terms.size());
    final BytesRefBuilder spare = new BytesRefBuilder();
    BytesRef term;

    if (terms.size() == 0) {
        return terms;
    } else {
        for (int i = terms.size() - 1; i >= 0; i--) {
            term = terms.get(spare, i);
            if (dataProv.getTermFrequency(term) <= 0L) {
                sb.append(term.utf8ToString()).append(' ');
                bits.set(i);
            }
        }

        if (bits.cardinality() > 0) {
            LOG.warn(sb.toString().trim() + "].");
            final BytesRefArray cleanTerms = new BytesRefArray(Counter.newCounter(false));
            for (int i = terms.size() - 1; i >= 0; i--) {
                if (!bits.get(i)) {
                    term = terms.get(spare, i);
                    cleanTerms.append(term); // copies bytes
                }
            }
            return cleanTerms;
        }
        return terms;
    }
}

From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java

License:Open Source License

@SuppressWarnings("ImplicitNumericConversion")
@Test/* ww  w.  j  a va 2s . c  o  m*/
public void testBits2BitSet() throws Exception {
    final FixedBitSet fbs = new FixedBitSet(11);
    fbs.set(1);
    fbs.set(3);
    fbs.set(6);
    fbs.set(7);
    fbs.set(8);
    fbs.set(10);

    final BitSet result = BitsUtils.bits2BitSet(fbs);

    Assert.assertEquals("Bit count mismatch.", fbs.cardinality(), result.cardinality());
    for (int i = 0; i < 11; i++) {
        Assert.assertEquals("Bits mismatch.", fbs.get(i), result.get(i));
    }
}

From source file:org.apache.solr.search.DocSetBuilder.java

License:Apache License

private static int dedup(int[] arr, int length, FixedBitSet acceptDocs) {
    int pos = 0;/*from w  ww. j av  a2 s.  c  o m*/
    int previous = -1;
    for (int i = 0; i < length; ++i) {
        final int value = arr[i];
        // assert value >= previous;
        if (value != previous && (acceptDocs == null || acceptDocs.get(value))) {
            arr[pos++] = value;
            previous = value;
        }
    }
    return pos;
}

From source file:org.elasticsearch.action.search.CanMatchPreFilterSearchPhase.java

License:Apache License

private GroupShardsIterator<SearchShardIterator> getIterator(BitSetSearchPhaseResults results,
        GroupShardsIterator<SearchShardIterator> shardsIts) {
    int cardinality = results.getNumPossibleMatches();
    FixedBitSet possibleMatches = results.getPossibleMatches();
    if (cardinality == 0) {
        // this is a special case where we have no hit but we need to get at least one search response in order
        // to produce a valid search result with all the aggs etc.
        possibleMatches.set(0);//  w  w  w  .j a va 2s. c o  m
    }
    int i = 0;
    for (SearchShardIterator iter : shardsIts) {
        if (possibleMatches.get(i++)) {
            iter.reset();
        } else {
            iter.resetAndSkip();
        }
    }
    return shardsIts;
}

From source file:org.elasticsearch.common.lucene.search.AndDocIdSetTests.java

License:Apache License

public void testDuel() throws IOException {
    for (int iter = 0; iter < 1000; ++iter) {
        final int numSets = 1 + random().nextInt(5);
        final int numDocs = 1 + random().nextInt(1000);
        FixedBitSet anded = new FixedBitSet(numDocs);
        anded.set(0, numDocs);//ww  w  .ja v  a  2  s.co  m
        final DocIdSet[] sets = new DocIdSet[numSets];
        for (int i = 0; i < numSets; ++i) {
            final FixedBitSet randomSet = randomBitSet(numDocs);

            anded.and(randomSet);

            if (random().nextBoolean()) {
                // will be considered 'fast' by AndDocIdSet
                sets[i] = new BitDocIdSet(randomSet);
            } else {
                // will be considered 'slow' by AndDocIdSet
                sets[i] = new DocValuesDocIdSet(numDocs, null) {
                    @Override
                    protected boolean matchDoc(int doc) {
                        return randomSet.get(doc);
                    }
                };
            }
        }
        AndDocIdSet andSet = new AndDocIdSet(sets);
        Bits andBits = andSet.bits();
        if (andBits != null) {
            for (int i = 0; i < numDocs; ++i) {
                assertEquals(anded.get(i), andBits.get(i));
            }
        }
        DocIdSetIterator andIt = andSet.iterator();
        if (andIt == null) {
            assertEquals(0, anded.cardinality());
        } else {
            int previous = -1;
            for (int doc = andIt.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = andIt.nextDoc()) {
                for (int j = previous + 1; j < doc; ++j) {
                    assertFalse(anded.get(j));
                }
                assertTrue(anded.get(doc));
                previous = doc;
            }
            for (int j = previous + 1; j < numDocs; ++j) {
                assertFalse(anded.get(j));
            }
        }
    }
}

From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java

License:Apache License

/**
 * Returns the a DocIdSetIterator representing the Boolean composition
 * of the filters that have been added.// ww  w.  ja  va  2 s  .co m
 */
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
    FixedBitSet res = null;
    final AtomicReader reader = context.reader();

    // optimize single case...
    if (clauses.size() == 1) {
        FilterClause clause = clauses.get(0);
        DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
        if (clause.getOccur() == Occur.MUST_NOT) {
            if (DocIdSets.isEmpty(set)) {
                return new AllDocIdSet(reader.maxDoc());
            } else {
                return new NotDocIdSet(set, reader.maxDoc());
            }
        }
        // SHOULD or MUST, just return the set...
        if (DocIdSets.isEmpty(set)) {
            return null;
        }
        return set;
    }

    // first, go over and see if we can shortcut the execution
    // and gather Bits if we need to
    List<ResultClause> results = new ArrayList<ResultClause>(clauses.size());
    boolean hasShouldClauses = false;
    boolean hasNonEmptyShouldClause = false;
    boolean hasMustClauses = false;
    boolean hasMustNotClauses = false;
    for (int i = 0; i < clauses.size(); i++) {
        FilterClause clause = clauses.get(i);
        DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
        if (clause.getOccur() == Occur.MUST) {
            hasMustClauses = true;
            if (DocIdSets.isEmpty(set)) {
                return null;
            }
        } else if (clause.getOccur() == Occur.SHOULD) {
            hasShouldClauses = true;
            if (DocIdSets.isEmpty(set)) {
                continue;
            }
            hasNonEmptyShouldClause = true;
        } else if (clause.getOccur() == Occur.MUST_NOT) {
            hasMustNotClauses = true;
            if (DocIdSets.isEmpty(set)) {
                // we mark empty ones as null for must_not, handle it in the next run...
                results.add(new ResultClause(null, null, clause));
                continue;
            }
        }
        Bits bits = null;
        if (!DocIdSets.isFastIterator(set)) {
            bits = set.bits();
        }
        results.add(new ResultClause(set, bits, clause));
    }

    if (hasShouldClauses && !hasNonEmptyShouldClause) {
        return null;
    }

    // now, go over the clauses and apply the "fast" ones first...
    hasNonEmptyShouldClause = false;
    boolean hasBits = false;
    // But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs
    // that don't match with a must or must_not clause.
    List<ResultClause> fastOrClauses = new ArrayList<ResultClause>();
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        // we apply bits in based ones (slow) in the second run
        if (clause.bits != null) {
            hasBits = true;
            continue;
        }
        if (clause.clause.getOccur() == Occur.SHOULD) {
            if (hasMustClauses || hasMustNotClauses) {
                fastOrClauses.add(clause);
            } else if (res == null) {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    hasNonEmptyShouldClause = true;
                    res = new FixedBitSet(reader.maxDoc());
                    res.or(it);
                }
            } else {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    hasNonEmptyShouldClause = true;
                    res.or(it);
                }
            }
        }
    }

    // Now we safely handle the "fast" must and must_not clauses.
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        // we apply bits in based ones (slow) in the second run
        if (clause.bits != null) {
            hasBits = true;
            continue;
        }
        if (clause.clause.getOccur() == Occur.MUST) {
            DocIdSetIterator it = clause.docIdSet.iterator();
            if (it == null) {
                return null;
            }
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.or(it);
            } else {
                res.and(it);
            }
        } else if (clause.clause.getOccur() == Occur.MUST_NOT) {
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
            }
            if (clause.docIdSet != null) {
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    res.andNot(it);
                }
            }
        }
    }

    if (!hasBits) {
        if (!fastOrClauses.isEmpty()) {
            DocIdSetIterator it = res.iterator();
            at_least_one_should_clause_iter: for (int setDoc = it
                    .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
                for (ResultClause fastOrClause : fastOrClauses) {
                    DocIdSetIterator clauseIterator = fastOrClause.iterator();
                    if (clauseIterator == null) {
                        continue;
                    }
                    if (iteratorMatch(clauseIterator, setDoc)) {
                        hasNonEmptyShouldClause = true;
                        continue at_least_one_should_clause_iter;
                    }
                }
                res.clear(setDoc);
            }
        }

        if (hasShouldClauses && !hasNonEmptyShouldClause) {
            return null;
        } else {
            return res;
        }
    }

    // we have some clauses with bits, apply them...
    // we let the "res" drive the computation, and check Bits for that
    List<ResultClause> slowOrClauses = new ArrayList<ResultClause>();
    for (int i = 0; i < results.size(); i++) {
        ResultClause clause = results.get(i);
        if (clause.bits == null) {
            continue;
        }
        if (clause.clause.getOccur() == Occur.SHOULD) {
            if (hasMustClauses || hasMustNotClauses) {
                slowOrClauses.add(clause);
            } else {
                if (res == null) {
                    DocIdSetIterator it = clause.docIdSet.iterator();
                    if (it == null) {
                        continue;
                    }
                    hasNonEmptyShouldClause = true;
                    res = new FixedBitSet(reader.maxDoc());
                    res.or(it);
                } else {
                    for (int doc = 0; doc < reader.maxDoc(); doc++) {
                        if (!res.get(doc) && clause.bits.get(doc)) {
                            hasNonEmptyShouldClause = true;
                            res.set(doc);
                        }
                    }
                }
            }
        } else if (clause.clause.getOccur() == Occur.MUST) {
            if (res == null) {
                // nothing we can do, just or it...
                res = new FixedBitSet(reader.maxDoc());
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it == null) {
                    return null;
                }
                res.or(it);
            } else {
                Bits bits = clause.bits;
                // use the "res" to drive the iteration
                DocIdSetIterator it = res.iterator();
                for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                    if (!bits.get(doc)) {
                        res.clear(doc);
                    }
                }
            }
        } else if (clause.clause.getOccur() == Occur.MUST_NOT) {
            if (res == null) {
                res = new FixedBitSet(reader.maxDoc());
                res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
                DocIdSetIterator it = clause.docIdSet.iterator();
                if (it != null) {
                    res.andNot(it);
                }
            } else {
                Bits bits = clause.bits;
                // let res drive the iteration
                DocIdSetIterator it = res.iterator();
                for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                    if (bits.get(doc)) {
                        res.clear(doc);
                    }
                }
            }
        }
    }

    // From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there
    // is already a must or must_not clause. However in the current ES bool filter behaviour at least one should
    // clause must match in order for a doc to be a match. What we do here is checking if matched docs match with
    // any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour
    if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) {
        DocIdSetIterator it = res.iterator();
        at_least_one_should_clause_iter: for (int setDoc = it
                .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
            for (ResultClause fastOrClause : fastOrClauses) {
                DocIdSetIterator clauseIterator = fastOrClause.iterator();
                if (clauseIterator == null) {
                    continue;
                }
                if (iteratorMatch(clauseIterator, setDoc)) {
                    hasNonEmptyShouldClause = true;
                    continue at_least_one_should_clause_iter;
                }
            }
            for (ResultClause slowOrClause : slowOrClauses) {
                if (slowOrClause.bits.get(setDoc)) {
                    hasNonEmptyShouldClause = true;
                    continue at_least_one_should_clause_iter;
                }
            }
            res.clear(setDoc);
        }
    }

    if (hasShouldClauses && !hasNonEmptyShouldClause) {
        return null;
    } else {
        return res;
    }

}

From source file:org.elasticsearch.common.lucene.search.XBooleanFilterTests.java

License:Apache License

@Test
public void testWithTwoClausesOfEachOccur_allFixedBitsetFilters() throws Exception {
    List<XBooleanFilter> booleanFilters = new ArrayList<XBooleanFilter>();
    booleanFilters/*  ww  w .ja  v a  2  s .  co  m*/
            .add(createBooleanFilter(newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false),
                    newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false),
                    newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false)));
    booleanFilters.add(createBooleanFilter(newFilterClause(4, 'e', MUST_NOT, false),
            newFilterClause(5, 'f', MUST_NOT, false), newFilterClause(0, 'a', MUST, false),
            newFilterClause(1, 'b', MUST, false), newFilterClause(2, 'c', SHOULD, false),
            newFilterClause(3, 'd', SHOULD, false)));
    booleanFilters.add(
            createBooleanFilter(newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false),
                    newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false),
                    newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false)));

    for (XBooleanFilter booleanFilter : booleanFilters) {
        FixedBitSet result = new FixedBitSet(reader.maxDoc());
        result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
        assertThat(result.cardinality(), equalTo(2));
        assertThat(result.get(0), equalTo(true));
        assertThat(result.get(1), equalTo(true));
        assertThat(result.get(2), equalTo(false));
    }
}

From source file:org.elasticsearch.common.lucene.search.XBooleanFilterTests.java

License:Apache License

@Test
public void testWithTwoClausesOfEachOccur_allBitsBasedFilters() throws Exception {
    List<XBooleanFilter> booleanFilters = new ArrayList<XBooleanFilter>();
    booleanFilters// ww w .ja v a  2s . c  o m
            .add(createBooleanFilter(newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true),
                    newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true),
                    newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true)));
    booleanFilters.add(createBooleanFilter(newFilterClause(4, 'e', MUST_NOT, true),
            newFilterClause(5, 'f', MUST_NOT, true), newFilterClause(0, 'a', MUST, true),
            newFilterClause(1, 'b', MUST, true), newFilterClause(2, 'c', SHOULD, true),
            newFilterClause(3, 'd', SHOULD, true)));
    booleanFilters.add(
            createBooleanFilter(newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true),
                    newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true),
                    newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true)));

    for (XBooleanFilter booleanFilter : booleanFilters) {
        FixedBitSet result = new FixedBitSet(reader.maxDoc());
        result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
        assertThat(result.cardinality(), equalTo(2));
        assertThat(result.get(0), equalTo(true));
        assertThat(result.get(1), equalTo(true));
        assertThat(result.get(2), equalTo(false));
    }
}

From source file:org.elasticsearch.common.lucene.search.XBooleanFilterTests.java

License:Apache License

@Test
public void testWithTwoClausesOfEachOccur_allFilterTypes() throws Exception {
    List<XBooleanFilter> booleanFilters = new ArrayList<XBooleanFilter>();
    booleanFilters//from w  w  w . j  a v  a 2s.com
            .add(createBooleanFilter(newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false),
                    newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false),
                    newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false)));
    booleanFilters.add(createBooleanFilter(newFilterClause(4, 'e', MUST_NOT, true),
            newFilterClause(5, 'f', MUST_NOT, false), newFilterClause(0, 'a', MUST, true),
            newFilterClause(1, 'b', MUST, false), newFilterClause(2, 'c', SHOULD, true),
            newFilterClause(3, 'd', SHOULD, false)));
    booleanFilters.add(
            createBooleanFilter(newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false),
                    newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false),
                    newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false)));

    for (XBooleanFilter booleanFilter : booleanFilters) {
        FixedBitSet result = new FixedBitSet(reader.maxDoc());
        result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
        assertThat(result.cardinality(), equalTo(2));
        assertThat(result.get(0), equalTo(true));
        assertThat(result.get(1), equalTo(true));
        assertThat(result.get(2), equalTo(false));
    }

    booleanFilters.clear();
    booleanFilters
            .add(createBooleanFilter(newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true),
                    newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true),
                    newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true)));
    booleanFilters.add(createBooleanFilter(newFilterClause(4, 'e', MUST_NOT, false),
            newFilterClause(5, 'f', MUST_NOT, true), newFilterClause(0, 'a', MUST, false),
            newFilterClause(1, 'b', MUST, true), newFilterClause(2, 'c', SHOULD, false),
            newFilterClause(3, 'd', SHOULD, true)));
    booleanFilters.add(
            createBooleanFilter(newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true),
                    newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true),
                    newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true)));

    for (XBooleanFilter booleanFilter : booleanFilters) {
        FixedBitSet result = new FixedBitSet(reader.maxDoc());
        result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
        assertThat(result.cardinality(), equalTo(2));
        assertThat(result.get(0), equalTo(true));
        assertThat(result.get(1), equalTo(true));
        assertThat(result.get(2), equalTo(false));
    }
}