List of usage examples for org.apache.lucene.util FixedBitSet and
public void and(FixedBitSet other)
From source file:org.alfresco.solr.query.BitsFilter.java
License:Open Source License
public void and(BitsFilter bitsFilter) { List<FixedBitSet> andSets = bitsFilter.bitSets; for (int i = 0; i < bitSets.size(); i++) { FixedBitSet a = bitSets.get(i); FixedBitSet b = andSets.get(i);/*from www .ja v a2 s .c o m*/ a.and(b); } }
From source file:org.elasticsearch.common.lucene.search.AndDocIdSetTests.java
License:Apache License
public void testDuel() throws IOException { for (int iter = 0; iter < 1000; ++iter) { final int numSets = 1 + random().nextInt(5); final int numDocs = 1 + random().nextInt(1000); FixedBitSet anded = new FixedBitSet(numDocs); anded.set(0, numDocs);/*ww w . j a va2s . c o m*/ final DocIdSet[] sets = new DocIdSet[numSets]; for (int i = 0; i < numSets; ++i) { final FixedBitSet randomSet = randomBitSet(numDocs); anded.and(randomSet); if (random().nextBoolean()) { // will be considered 'fast' by AndDocIdSet sets[i] = new BitDocIdSet(randomSet); } else { // will be considered 'slow' by AndDocIdSet sets[i] = new DocValuesDocIdSet(numDocs, null) { @Override protected boolean matchDoc(int doc) { return randomSet.get(doc); } }; } } AndDocIdSet andSet = new AndDocIdSet(sets); Bits andBits = andSet.bits(); if (andBits != null) { for (int i = 0; i < numDocs; ++i) { assertEquals(anded.get(i), andBits.get(i)); } } DocIdSetIterator andIt = andSet.iterator(); if (andIt == null) { assertEquals(0, anded.cardinality()); } else { int previous = -1; for (int doc = andIt.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = andIt.nextDoc()) { for (int j = previous + 1; j < doc; ++j) { assertFalse(anded.get(j)); } assertTrue(anded.get(doc)); previous = doc; } for (int j = previous + 1; j < numDocs; ++j) { assertFalse(anded.get(j)); } } } }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java
License:Apache License
/** * Returns the a DocIdSetIterator representing the Boolean composition * of the filters that have been added./* w w w . j a va2s . co m*/ */ @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { FixedBitSet res = null; final AtomicReader reader = context.reader(); // optimize single case... if (clauses.size() == 1) { FilterClause clause = clauses.get(0); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST_NOT) { if (DocIdSets.isEmpty(set)) { return new AllDocIdSet(reader.maxDoc()); } else { return new NotDocIdSet(set, reader.maxDoc()); } } // SHOULD or MUST, just return the set... if (DocIdSets.isEmpty(set)) { return null; } return set; } // first, go over and see if we can shortcut the execution // and gather Bits if we need to List<ResultClause> results = new ArrayList<ResultClause>(clauses.size()); boolean hasShouldClauses = false; boolean hasNonEmptyShouldClause = false; boolean hasMustClauses = false; boolean hasMustNotClauses = false; for (int i = 0; i < clauses.size(); i++) { FilterClause clause = clauses.get(i); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST) { hasMustClauses = true; if (DocIdSets.isEmpty(set)) { return null; } } else if (clause.getOccur() == Occur.SHOULD) { hasShouldClauses = true; if (DocIdSets.isEmpty(set)) { continue; } hasNonEmptyShouldClause = true; } else if (clause.getOccur() == Occur.MUST_NOT) { hasMustNotClauses = true; if (DocIdSets.isEmpty(set)) { // we mark empty ones as null for must_not, handle it in the next run... results.add(new ResultClause(null, null, clause)); continue; } } Bits bits = null; if (!DocIdSets.isFastIterator(set)) { bits = set.bits(); } results.add(new ResultClause(set, bits, clause)); } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } // now, go over the clauses and apply the "fast" ones first... hasNonEmptyShouldClause = false; boolean hasBits = false; // But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs // that don't match with a must or must_not clause. List<ResultClause> fastOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { fastOrClauses.add(clause); } else if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } } else { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res.or(it); } } } } // Now we safely handle the "fast" must and must_not clauses. for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.MUST) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { res.and(it); } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs } if (clause.docIdSet != null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } } } if (!hasBits) { if (!fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } } // we have some clauses with bits, apply them... // we let the "res" drive the computation, and check Bits for that List<ResultClause> slowOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); if (clause.bits == null) { continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { slowOrClauses.add(clause); } else { if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { continue; } hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { for (int doc = 0; doc < reader.maxDoc(); doc++) { if (!res.get(doc) && clause.bits.get(doc)) { hasNonEmptyShouldClause = true; res.set(doc); } } } } } else if (clause.clause.getOccur() == Occur.MUST) { if (res == null) { // nothing we can do, just or it... res = new FixedBitSet(reader.maxDoc()); DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } res.or(it); } else { Bits bits = clause.bits; // use the "res" to drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (!bits.get(doc)) { res.clear(doc); } } } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } else { Bits bits = clause.bits; // let res drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (bits.get(doc)) { res.clear(doc); } } } } } // From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there // is already a must or must_not clause. However in the current ES bool filter behaviour at least one should // clause must match in order for a doc to be a match. What we do here is checking if matched docs match with // any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } for (ResultClause slowOrClause : slowOrClauses) { if (slowOrClause.bits.get(setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } }
From source file:org.geotoolkit.lucene.filter.SerialChainFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(final LeafReaderContext ctx, final Bits b) throws CorruptIndexException, IOException { final int chainSize = chain.size(); final int actionSize = actionType.length; final FixedBitSet bits = (FixedBitSet) ((BitDocIdSet) chain.get(0).getDocIdSet(ctx, b)).bits(); //if there is only an operand not we don't enter the loop int j = 0;/*from ww w. ja v a 2 s .c o m*/ if (actionType[j] == NOT) { bits.flip(0, ctx.reader().maxDoc()); j++; } for (int i = 1; i < chainSize; i++) { LogicalFilterType action; if (j < actionSize) { action = actionType[j]; j++; } else { action = DEFAULT; } final FixedBitSet nextFilterResponse = (FixedBitSet) ((BitDocIdSet) chain.get(i).getDocIdSet(ctx, b)) .bits(); //if the next operator is NOT we have to process the action before the current operand if (j < actionSize && actionType[j] == NOT) { nextFilterResponse.flip(0, ctx.reader().maxDoc()); j++; } switch (action) { case AND: bits.and(nextFilterResponse); break; case OR: bits.or(nextFilterResponse); break; case XOR: bits.xor(nextFilterResponse); break; default: bits.or(nextFilterResponse); break; } } // invalidate deleted document return invalidateDeletedDocument(bits, b); }
From source file:org.yes.cart.dao.impl.GenericDAOHibernateImpl.java
License:Apache License
/** * {@inheritDoc}// ww w . j a va 2 s. c o m */ public Map<String, List<Pair<String, Integer>>> fullTextSearchNavigation( final org.apache.lucene.search.Query query, final List<FilteredNavigationRecordRequest> facetingRequest) { if (persistentClassIndexble) { if (LOGFTQ.isDebugEnabled()) { LOGFTQ.debug("Run facet request with base query {}", query); } if (facetingRequest == null || facetingRequest.isEmpty()) { return Collections.emptyMap(); } FullTextSession fullTextSession = Search.getFullTextSession(sessionFactory.getCurrentSession()); QueryBuilder qb = fullTextSession.getSearchFactory().buildQueryBuilder().forEntity(getPersistentClass()) .get(); FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(query, getPersistentClass()); fullTextQuery.setMaxResults(1); final FacetManager facetManager = fullTextQuery.getFacetManager(); boolean hasMultivalue = false; for (final FilteredNavigationRecordRequest facetingRequestItem : facetingRequest) { if (facetingRequestItem.isRangeValue()) { final FacetRangeAboveBelowContext facetCtx = qb.facet().name(facetingRequestItem.getFacetName()) .onField(facetingRequestItem.getField()).range(); final Iterator<Pair<String, String>> rageIt = facetingRequestItem.getRangeValues().iterator(); while (rageIt.hasNext()) { final Pair<String, String> range = rageIt.next(); if (rageIt.hasNext()) { facetCtx.from(range.getFirst()).to(range.getSecond()).excludeLimit(); } else { facetManager.enableFaceting(facetCtx.from(range.getFirst()).to(range.getSecond()) .orderedBy(FacetSortOrder.RANGE_DEFINITION_ODER).createFacetingRequest()); } } } else { final DiscreteFacetContext facetCtx = qb.facet().name(facetingRequestItem.getFacetName()) .onField(facetingRequestItem.getField()).discrete(); facetManager.enableFaceting( facetCtx.includeZeroCounts(facetingRequestItem.isMultiValue()).createFacetingRequest()); if (facetingRequestItem.isMultiValue()) { hasMultivalue = true; } } } final Map<String, List<Pair<String, Integer>>> out = new HashMap<String, List<Pair<String, Integer>>>(); IndexReader indexReader = null; FixedBitSet baseBitSet = null; try { if (hasMultivalue) { indexReader = fullTextSession.getSearchFactory().getIndexReaderAccessor() .open(getPersistentClass()); CachingWrapperFilter baseQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(query)); try { DocIdSet docIdSet = baseQueryFilter.getDocIdSet(indexReader); if (docIdSet instanceof FixedBitSet) { baseBitSet = (FixedBitSet) docIdSet; } else { baseBitSet = new FixedBitSet(1); } } catch (IOException e) { LOGFTQ.error("Unable to create base query bit set for query {} and faceting request {}", query, facetingRequest); LOGFTQ.error("Stacktrace:", e); baseBitSet = new FixedBitSet(1); } } for (final FilteredNavigationRecordRequest facetingRequestItem : facetingRequest) { final List<Pair<String, Integer>> facetsPairs = new ArrayList<Pair<String, Integer>>(); final List<Facet> facets = facetManager.getFacets(facetingRequestItem.getFacetName()); LOGFTQ.debug("Faceting request request: {}", facetingRequestItem); if (facetingRequestItem.isMultiValue() && !facetingRequestItem.isRangeValue()) { // Multivalue black magic for (final Facet facet : facets) { final org.apache.lucene.search.Query facetQuery = new TermQuery( new Term(facet.getFieldName(), facet.getValue())); try { CachingWrapperFilter filter = new CachingWrapperFilter( new QueryWrapperFilter(facetQuery)); DocIdSet docIdSet = filter.getDocIdSet(indexReader); if (docIdSet instanceof FixedBitSet) { FixedBitSet filterBitSet = (FixedBitSet) docIdSet; filterBitSet.and(baseBitSet); long count = filterBitSet.cardinality(); if (count > 0L) { LOGFTQ.debug("Has facet: {}", facet); facetsPairs.add(new Pair<String, Integer>(facet.getValue(), (int) count)); } } } catch (IOException e) { LOGFTQ.error( "Unable to create filter query bit set for query {} and faceting query {}", query, facetQuery); LOGFTQ.error("Stacktrace:", e); } } } else { // Standard discrete values and ranges for (final Facet facet : facets) { LOGFTQ.debug("Has facet: {}", facet); facetsPairs.add(new Pair<String, Integer>(facet.getValue(), facet.getCount())); } } out.put(facetingRequestItem.getFacetName(), facetsPairs); } } finally { if (hasMultivalue) { fullTextSession.getSearchFactory().getIndexReaderAccessor().close(indexReader); } } return out; } return Collections.emptyMap(); }