List of usage examples for org.apache.lucene.util FixedBitSet set
public void set(int startIndex, int endIndex)
From source file:DocIdSetBenchmark.java
License:Apache License
protected static FixedBitSet randomSet(int numBits, int numBitsSet) { assert numBitsSet <= numBits; final FixedBitSet set = new FixedBitSet(numBits); if (numBitsSet == numBits) { set.set(0, numBits); } else {//from ww w . j a va 2 s. c o m for (int i = 0; i < numBitsSet; ++i) { while (true) { final int o = RANDOM.nextInt(numBits); if (!set.get(o)) { set.set(o); break; } } } } return set; }
From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs) throws IOException { FixedBitSet checkBits; final LeafReader reader = context.reader(); final int maxDoc = reader.maxDoc(); BitSet finalBits = new SparseFixedBitSet(maxDoc); if (acceptDocs == null) { checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs()); if (checkBits == null) { // all live checkBits = new FixedBitSet(maxDoc); checkBits.set(0, checkBits.length()); }/* w ww . jav a 2 s .c o m*/ } else { checkBits = BitsUtils.bits2FixedBitSet(acceptDocs); } @Nullable final Terms terms = reader.terms(this.field); if (terms != null) { final int termsDocCount = terms.getDocCount(); if (termsDocCount != 0) { if (termsDocCount == maxDoc) { // all matching finalBits = checkBits; } else { @Nullable final Terms t = reader.terms(this.field); if (t != null) { PostingsEnum pe = null; final TermsEnum te = t.iterator(null); int docId; while (te.next() != null) { pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE); while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (checkBits.getAndClear(docId)) { finalBits.set(docId); } } } } } } } return new BitDocIdSet(finalBits); }
From source file:net.conquiris.lucene.search.NegatingFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { final int n = reader.maxDoc(); final FixedBitSet bits = new FixedBitSet(reader.maxDoc()); final DocIdSet set = filter.getDocIdSet(reader); if (set == null || set == DocIdSet.EMPTY_DOCIDSET) { bits.set(0, n); } else {//from w w w . j a va2 s .c o m DocIdSetIterator i = set.iterator(); if (i == null) { bits.set(0, n); } else { bits.or(i); bits.flip(0, n); } } return bits; }
From source file:org.apache.solr.search.CitationLRUCache.java
License:Apache License
private void warmIncrementally(SolrIndexSearcher searcher, SolrCache<K, V> old) throws IOException { if (regenerator == null) return;// www . ja va 2s . c o m //System.out.println("regenerator: " + regenerator); Map<String, List<String>> fields = getFields(searcher, this.identifierFields); if (fields.get("textClasses").size() > 0 || fields.get("textClassesMV").size() > 0) { synchronized (map) { treatIdentifiersAsText = true; } } long warmingStartTime = System.currentTimeMillis(); CitationLRUCache<K, V> other = (CitationLRUCache<K, V>) old; // collect ids of documents that need to be reloaded/regenerated during this // warmup run //System.out.println("searcher: " + searcher.toString()); //System.out.println("maxDoc: " + searcher.getIndexReader().maxDoc()); FixedBitSet toRefresh = new FixedBitSet(searcher.getIndexReader().maxDoc()); //System.out.println("version=" + searcher.getIndexReader().getVersion()); //try { //System.out.println("commit=" + searcher.getIndexReader().getIndexCommit()); //} catch (IOException e2) { // TODO Auto-generated catch block //e2.printStackTrace(); //} // for (IndexReaderContext c : searcher.getTopReaderContext().children()) { // //System.out.println("context=" + c.reader().getCombinedCoreAndDeletesKey()); // } // for (IndexReaderContext l : searcher.getIndexReader().leaves()) { // //System.out.println(l); // } Bits liveDocs = searcher.getAtomicReader().getLiveDocs(); //System.out.println(liveDocs == null ? "liveDocs=" + null : "liveDocs=" + liveDocs.length()); //System.out.println("numDeletes=" + searcher.getAtomicReader().numDeletedDocs()); if (liveDocs == null) { // everything is new, this could be fresh index or merged/optimized index too //searcher.getAtomicReader().getContext().children().size() //other.map.clear(); // force regeneration toRefresh.set(0, toRefresh.length()); // Build the mapping from indexed values into lucene ids // this must always be available, so we build it no matter what... // XXX: make it update only the necessary IDs (not the whole index) unInvertedTheDamnThing(searcher.getAtomicReader(), fields, liveDocs, new KVSetter() { @SuppressWarnings("unchecked") @Override public void set(int docbase, int docid, Object value) { put((K) value, (V) (Integer) (docbase + docid)); } }); } else if (liveDocs != null) { Integer luceneId; for (V v : other.map.values()) { luceneId = ((Integer) v); if (luceneId <= liveDocs.length() && !liveDocs.get(luceneId)) { // doc was either deleted or updated //System.out.println("Found deleted: " + luceneId); // retrieve all citations/references for this luceneId and mark these docs to be refreshed } } for (int i = 0; i < toRefresh.length(); i++) { if (liveDocs.get(i)) { toRefresh.set(i); } } } // warm entries if (isAutowarmingOn()) { Object[] keys, vals = null; // Don't do the autowarming in the synchronized block, just pull out the keys and values. synchronized (other.map) { int sz = autowarm.getWarmCount(other.map.size()); keys = new Object[sz]; vals = new Object[sz]; Iterator<Map.Entry<K, V>> iter = other.map.entrySet().iterator(); // iteration goes from oldest (least recently used) to most recently used, // so we need to skip over the oldest entries. int skip = other.map.size() - sz; for (int i = 0; i < skip; i++) iter.next(); for (int i = 0; i < sz; i++) { Map.Entry<K, V> entry = iter.next(); keys[i] = entry.getKey(); vals[i] = entry.getValue(); } } // autowarm from the oldest to the newest entries so that the ordering will be // correct in the new cache. for (int i = 0; i < keys.length; i++) { try { boolean continueRegen = true; if (isModified(liveDocs, keys[i], vals[i])) { toRefresh.set((Integer) keys[i]); } else { continueRegen = regenerator.regenerateItem(searcher, this, old, keys[i], vals[i]); } if (!continueRegen) break; } catch (Throwable e) { SolrException.log(log, "Error during auto-warming of key:" + keys[i], e); } } } warmupTime = System.currentTimeMillis() - warmingStartTime; }
From source file:org.elasticsearch.common.lucene.search.AndDocIdSetTests.java
License:Apache License
public void testDuel() throws IOException { for (int iter = 0; iter < 1000; ++iter) { final int numSets = 1 + random().nextInt(5); final int numDocs = 1 + random().nextInt(1000); FixedBitSet anded = new FixedBitSet(numDocs); anded.set(0, numDocs); final DocIdSet[] sets = new DocIdSet[numSets]; for (int i = 0; i < numSets; ++i) { final FixedBitSet randomSet = randomBitSet(numDocs); anded.and(randomSet);//from w ww .ja v a 2 s .c o m if (random().nextBoolean()) { // will be considered 'fast' by AndDocIdSet sets[i] = new BitDocIdSet(randomSet); } else { // will be considered 'slow' by AndDocIdSet sets[i] = new DocValuesDocIdSet(numDocs, null) { @Override protected boolean matchDoc(int doc) { return randomSet.get(doc); } }; } } AndDocIdSet andSet = new AndDocIdSet(sets); Bits andBits = andSet.bits(); if (andBits != null) { for (int i = 0; i < numDocs; ++i) { assertEquals(anded.get(i), andBits.get(i)); } } DocIdSetIterator andIt = andSet.iterator(); if (andIt == null) { assertEquals(0, anded.cardinality()); } else { int previous = -1; for (int doc = andIt.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = andIt.nextDoc()) { for (int j = previous + 1; j < doc; ++j) { assertFalse(anded.get(j)); } assertTrue(anded.get(doc)); previous = doc; } for (int j = previous + 1; j < numDocs; ++j) { assertFalse(anded.get(j)); } } } }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java
License:Apache License
/** * Returns the a DocIdSetIterator representing the Boolean composition * of the filters that have been added./*from w ww .ja va 2 s . c o m*/ */ @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { FixedBitSet res = null; final AtomicReader reader = context.reader(); // optimize single case... if (clauses.size() == 1) { FilterClause clause = clauses.get(0); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST_NOT) { if (DocIdSets.isEmpty(set)) { return new AllDocIdSet(reader.maxDoc()); } else { return new NotDocIdSet(set, reader.maxDoc()); } } // SHOULD or MUST, just return the set... if (DocIdSets.isEmpty(set)) { return null; } return set; } // first, go over and see if we can shortcut the execution // and gather Bits if we need to List<ResultClause> results = new ArrayList<ResultClause>(clauses.size()); boolean hasShouldClauses = false; boolean hasNonEmptyShouldClause = false; boolean hasMustClauses = false; boolean hasMustNotClauses = false; for (int i = 0; i < clauses.size(); i++) { FilterClause clause = clauses.get(i); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST) { hasMustClauses = true; if (DocIdSets.isEmpty(set)) { return null; } } else if (clause.getOccur() == Occur.SHOULD) { hasShouldClauses = true; if (DocIdSets.isEmpty(set)) { continue; } hasNonEmptyShouldClause = true; } else if (clause.getOccur() == Occur.MUST_NOT) { hasMustNotClauses = true; if (DocIdSets.isEmpty(set)) { // we mark empty ones as null for must_not, handle it in the next run... results.add(new ResultClause(null, null, clause)); continue; } } Bits bits = null; if (!DocIdSets.isFastIterator(set)) { bits = set.bits(); } results.add(new ResultClause(set, bits, clause)); } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } // now, go over the clauses and apply the "fast" ones first... hasNonEmptyShouldClause = false; boolean hasBits = false; // But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs // that don't match with a must or must_not clause. List<ResultClause> fastOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { fastOrClauses.add(clause); } else if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } } else { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res.or(it); } } } } // Now we safely handle the "fast" must and must_not clauses. for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.MUST) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { res.and(it); } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs } if (clause.docIdSet != null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } } } if (!hasBits) { if (!fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } } // we have some clauses with bits, apply them... // we let the "res" drive the computation, and check Bits for that List<ResultClause> slowOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); if (clause.bits == null) { continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { slowOrClauses.add(clause); } else { if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { continue; } hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { for (int doc = 0; doc < reader.maxDoc(); doc++) { if (!res.get(doc) && clause.bits.get(doc)) { hasNonEmptyShouldClause = true; res.set(doc); } } } } } else if (clause.clause.getOccur() == Occur.MUST) { if (res == null) { // nothing we can do, just or it... res = new FixedBitSet(reader.maxDoc()); DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } res.or(it); } else { Bits bits = clause.bits; // use the "res" to drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (!bits.get(doc)) { res.clear(doc); } } } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } else { Bits bits = clause.bits; // let res drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (bits.get(doc)) { res.clear(doc); } } } } } // From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there // is already a must or must_not clause. However in the current ES bool filter behaviour at least one should // clause must match in order for a doc to be a match. What we do here is checking if matched docs match with // any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } for (ResultClause slowOrClause : slowOrClauses) { if (slowOrClause.bits.get(setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } }
From source file:org.elasticsearch.search.aggregations.metrics.RedBlackTreeTests.java
License:Apache License
public void testRemove() { final int numValues = scaledRandomIntBetween(200, 1000); final FixedBitSet values = new FixedBitSet(numValues); values.set(0, numValues); IntRedBlackTree tree = new IntRedBlackTree(); for (int i = 0; i < numValues; ++i) { tree.add(i);/* w w w . j av a2 s .c o m*/ } final int iters = scaledRandomIntBetween(300, 1000); for (int i = 0; i < iters; ++i) { final int value = randomInt(numValues - 1); final boolean removed = tree.remove(value); assertEquals(removed, values.get(value)); values.clear(value); assertEquals(values.cardinality(), tree.size()); tree.assertConsistent(); } int size = 0; int previousValue = Integer.MIN_VALUE; for (IntCursor cursor : tree) { ++size; final int value = tree.values[cursor.value]; assertTrue(previousValue < value); assertTrue(values.get(value)); previousValue = value; } assertEquals(values.cardinality(), size); }