List of usage examples for org.apache.lucene.util FixedBitSet get
@Override
public boolean get(int index)
From source file:DocIdSetBenchmark.java
License:Apache License
protected static FixedBitSet randomSet(int numBits, int numBitsSet) { assert numBitsSet <= numBits; final FixedBitSet set = new FixedBitSet(numBits); if (numBitsSet == numBits) { set.set(0, numBits);//from w w w . j a v a2s.com } else { for (int i = 0; i < numBitsSet; ++i) { while (true) { final int o = RANDOM.nextInt(numBits); if (!set.get(o)) { set.set(o); break; } } } } return set; }
From source file:de.unihildesheim.iw.lucene.query.QueryUtils.java
License:Open Source License
/** * Remove terms from the given collection, if they are not found in the * collection.//from w w w .jav a2 s. co m * * @param dataProv IndexDataProvider * @param terms Collection of terms to check against the collection * @return Passed in terms with non-collection terms removed */ @SuppressFBWarnings("LO_APPENDED_STRING_IN_FORMAT_STRING") private static BytesRefArray removeUnknownTerms(@NotNull final IndexDataProvider dataProv, @NotNull final BytesRefArray terms) { final StringBuilder sb = new StringBuilder("Skipped terms (stopword or not in collection): ["); final FixedBitSet bits = new FixedBitSet(terms.size()); final BytesRefBuilder spare = new BytesRefBuilder(); BytesRef term; if (terms.size() == 0) { return terms; } else { for (int i = terms.size() - 1; i >= 0; i--) { term = terms.get(spare, i); if (dataProv.getTermFrequency(term) <= 0L) { sb.append(term.utf8ToString()).append(' '); bits.set(i); } } if (bits.cardinality() > 0) { LOG.warn(sb.toString().trim() + "]."); final BytesRefArray cleanTerms = new BytesRefArray(Counter.newCounter(false)); for (int i = terms.size() - 1; i >= 0; i--) { if (!bits.get(i)) { term = terms.get(spare, i); cleanTerms.append(term); // copies bytes } } return cleanTerms; } return terms; } }
From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java
License:Open Source License
@SuppressWarnings("ImplicitNumericConversion") @Test/* ww w. j a va 2s . c o m*/ public void testBits2BitSet() throws Exception { final FixedBitSet fbs = new FixedBitSet(11); fbs.set(1); fbs.set(3); fbs.set(6); fbs.set(7); fbs.set(8); fbs.set(10); final BitSet result = BitsUtils.bits2BitSet(fbs); Assert.assertEquals("Bit count mismatch.", fbs.cardinality(), result.cardinality()); for (int i = 0; i < 11; i++) { Assert.assertEquals("Bits mismatch.", fbs.get(i), result.get(i)); } }
From source file:org.apache.solr.search.DocSetBuilder.java
License:Apache License
private static int dedup(int[] arr, int length, FixedBitSet acceptDocs) { int pos = 0;/*from w ww. j av a2 s. c o m*/ int previous = -1; for (int i = 0; i < length; ++i) { final int value = arr[i]; // assert value >= previous; if (value != previous && (acceptDocs == null || acceptDocs.get(value))) { arr[pos++] = value; previous = value; } } return pos; }
From source file:org.elasticsearch.action.search.CanMatchPreFilterSearchPhase.java
License:Apache License
private GroupShardsIterator<SearchShardIterator> getIterator(BitSetSearchPhaseResults results, GroupShardsIterator<SearchShardIterator> shardsIts) { int cardinality = results.getNumPossibleMatches(); FixedBitSet possibleMatches = results.getPossibleMatches(); if (cardinality == 0) { // this is a special case where we have no hit but we need to get at least one search response in order // to produce a valid search result with all the aggs etc. possibleMatches.set(0);// w w w .j a va 2s. c o m } int i = 0; for (SearchShardIterator iter : shardsIts) { if (possibleMatches.get(i++)) { iter.reset(); } else { iter.resetAndSkip(); } } return shardsIts; }
From source file:org.elasticsearch.common.lucene.search.AndDocIdSetTests.java
License:Apache License
public void testDuel() throws IOException { for (int iter = 0; iter < 1000; ++iter) { final int numSets = 1 + random().nextInt(5); final int numDocs = 1 + random().nextInt(1000); FixedBitSet anded = new FixedBitSet(numDocs); anded.set(0, numDocs);//ww w .ja v a 2 s.co m final DocIdSet[] sets = new DocIdSet[numSets]; for (int i = 0; i < numSets; ++i) { final FixedBitSet randomSet = randomBitSet(numDocs); anded.and(randomSet); if (random().nextBoolean()) { // will be considered 'fast' by AndDocIdSet sets[i] = new BitDocIdSet(randomSet); } else { // will be considered 'slow' by AndDocIdSet sets[i] = new DocValuesDocIdSet(numDocs, null) { @Override protected boolean matchDoc(int doc) { return randomSet.get(doc); } }; } } AndDocIdSet andSet = new AndDocIdSet(sets); Bits andBits = andSet.bits(); if (andBits != null) { for (int i = 0; i < numDocs; ++i) { assertEquals(anded.get(i), andBits.get(i)); } } DocIdSetIterator andIt = andSet.iterator(); if (andIt == null) { assertEquals(0, anded.cardinality()); } else { int previous = -1; for (int doc = andIt.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = andIt.nextDoc()) { for (int j = previous + 1; j < doc; ++j) { assertFalse(anded.get(j)); } assertTrue(anded.get(doc)); previous = doc; } for (int j = previous + 1; j < numDocs; ++j) { assertFalse(anded.get(j)); } } } }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java
License:Apache License
/** * Returns the a DocIdSetIterator representing the Boolean composition * of the filters that have been added.// ww w. ja va 2 s .co m */ @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { FixedBitSet res = null; final AtomicReader reader = context.reader(); // optimize single case... if (clauses.size() == 1) { FilterClause clause = clauses.get(0); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST_NOT) { if (DocIdSets.isEmpty(set)) { return new AllDocIdSet(reader.maxDoc()); } else { return new NotDocIdSet(set, reader.maxDoc()); } } // SHOULD or MUST, just return the set... if (DocIdSets.isEmpty(set)) { return null; } return set; } // first, go over and see if we can shortcut the execution // and gather Bits if we need to List<ResultClause> results = new ArrayList<ResultClause>(clauses.size()); boolean hasShouldClauses = false; boolean hasNonEmptyShouldClause = false; boolean hasMustClauses = false; boolean hasMustNotClauses = false; for (int i = 0; i < clauses.size(); i++) { FilterClause clause = clauses.get(i); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST) { hasMustClauses = true; if (DocIdSets.isEmpty(set)) { return null; } } else if (clause.getOccur() == Occur.SHOULD) { hasShouldClauses = true; if (DocIdSets.isEmpty(set)) { continue; } hasNonEmptyShouldClause = true; } else if (clause.getOccur() == Occur.MUST_NOT) { hasMustNotClauses = true; if (DocIdSets.isEmpty(set)) { // we mark empty ones as null for must_not, handle it in the next run... results.add(new ResultClause(null, null, clause)); continue; } } Bits bits = null; if (!DocIdSets.isFastIterator(set)) { bits = set.bits(); } results.add(new ResultClause(set, bits, clause)); } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } // now, go over the clauses and apply the "fast" ones first... hasNonEmptyShouldClause = false; boolean hasBits = false; // But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs // that don't match with a must or must_not clause. List<ResultClause> fastOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { fastOrClauses.add(clause); } else if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } } else { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res.or(it); } } } } // Now we safely handle the "fast" must and must_not clauses. for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.MUST) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { res.and(it); } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs } if (clause.docIdSet != null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } } } if (!hasBits) { if (!fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } } // we have some clauses with bits, apply them... // we let the "res" drive the computation, and check Bits for that List<ResultClause> slowOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); if (clause.bits == null) { continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { slowOrClauses.add(clause); } else { if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { continue; } hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { for (int doc = 0; doc < reader.maxDoc(); doc++) { if (!res.get(doc) && clause.bits.get(doc)) { hasNonEmptyShouldClause = true; res.set(doc); } } } } } else if (clause.clause.getOccur() == Occur.MUST) { if (res == null) { // nothing we can do, just or it... res = new FixedBitSet(reader.maxDoc()); DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } res.or(it); } else { Bits bits = clause.bits; // use the "res" to drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (!bits.get(doc)) { res.clear(doc); } } } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } else { Bits bits = clause.bits; // let res drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (bits.get(doc)) { res.clear(doc); } } } } } // From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there // is already a must or must_not clause. However in the current ES bool filter behaviour at least one should // clause must match in order for a doc to be a match. What we do here is checking if matched docs match with // any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } for (ResultClause slowOrClause : slowOrClauses) { if (slowOrClause.bits.get(setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilterTests.java
License:Apache License
@Test public void testWithTwoClausesOfEachOccur_allFixedBitsetFilters() throws Exception { List<XBooleanFilter> booleanFilters = new ArrayList<XBooleanFilter>(); booleanFilters/* ww w .ja v a 2 s . co m*/ .add(createBooleanFilter(newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false), newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false), newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false))); booleanFilters.add(createBooleanFilter(newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false), newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false), newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false))); booleanFilters.add( createBooleanFilter(newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, false), newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, false), newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, false))); for (XBooleanFilter booleanFilter : booleanFilters) { FixedBitSet result = new FixedBitSet(reader.maxDoc()); result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); assertThat(result.cardinality(), equalTo(2)); assertThat(result.get(0), equalTo(true)); assertThat(result.get(1), equalTo(true)); assertThat(result.get(2), equalTo(false)); } }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilterTests.java
License:Apache License
@Test public void testWithTwoClausesOfEachOccur_allBitsBasedFilters() throws Exception { List<XBooleanFilter> booleanFilters = new ArrayList<XBooleanFilter>(); booleanFilters// ww w .ja v a 2s . c o m .add(createBooleanFilter(newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true), newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true), newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true))); booleanFilters.add(createBooleanFilter(newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true), newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true), newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true))); booleanFilters.add( createBooleanFilter(newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, true), newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, true), newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, true))); for (XBooleanFilter booleanFilter : booleanFilters) { FixedBitSet result = new FixedBitSet(reader.maxDoc()); result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); assertThat(result.cardinality(), equalTo(2)); assertThat(result.get(0), equalTo(true)); assertThat(result.get(1), equalTo(true)); assertThat(result.get(2), equalTo(false)); } }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilterTests.java
License:Apache License
@Test public void testWithTwoClausesOfEachOccur_allFilterTypes() throws Exception { List<XBooleanFilter> booleanFilters = new ArrayList<XBooleanFilter>(); booleanFilters//from w w w . j a v a 2s.com .add(createBooleanFilter(newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false), newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false), newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false))); booleanFilters.add(createBooleanFilter(newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false), newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false), newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false))); booleanFilters.add( createBooleanFilter(newFilterClause(2, 'c', SHOULD, true), newFilterClause(3, 'd', SHOULD, false), newFilterClause(4, 'e', MUST_NOT, true), newFilterClause(5, 'f', MUST_NOT, false), newFilterClause(0, 'a', MUST, true), newFilterClause(1, 'b', MUST, false))); for (XBooleanFilter booleanFilter : booleanFilters) { FixedBitSet result = new FixedBitSet(reader.maxDoc()); result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); assertThat(result.cardinality(), equalTo(2)); assertThat(result.get(0), equalTo(true)); assertThat(result.get(1), equalTo(true)); assertThat(result.get(2), equalTo(false)); } booleanFilters.clear(); booleanFilters .add(createBooleanFilter(newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true), newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true), newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true))); booleanFilters.add(createBooleanFilter(newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true), newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true), newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true))); booleanFilters.add( createBooleanFilter(newFilterClause(2, 'c', SHOULD, false), newFilterClause(3, 'd', SHOULD, true), newFilterClause(4, 'e', MUST_NOT, false), newFilterClause(5, 'f', MUST_NOT, true), newFilterClause(0, 'a', MUST, false), newFilterClause(1, 'b', MUST, true))); for (XBooleanFilter booleanFilter : booleanFilters) { FixedBitSet result = new FixedBitSet(reader.maxDoc()); result.or(booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator()); assertThat(result.cardinality(), equalTo(2)); assertThat(result.get(0), equalTo(true)); assertThat(result.get(1), equalTo(true)); assertThat(result.get(2), equalTo(false)); } }