List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:org.elasticsearch.action.termvectors.TermVectorsWriter.java
License:Apache License
private PostingsEnum writeTermWithDocsAndPos(TermsEnum iterator, PostingsEnum docsAndPosEnum, boolean positions, boolean offsets, boolean payloads) throws IOException { docsAndPosEnum = iterator.postings(docsAndPosEnum, PostingsEnum.ALL); // for each term (iterator next) in this field (field) // iterate over the docs (should only be one) int nextDoc = docsAndPosEnum.nextDoc(); assert nextDoc != DocIdSetIterator.NO_MORE_DOCS; final int freq = docsAndPosEnum.freq(); writeFreq(freq);// w w w .j a va2 s . c om for (int j = 0; j < freq; j++) { int curPos = docsAndPosEnum.nextPosition(); if (positions) { writePosition(curPos); } if (offsets) { writeOffsets(docsAndPosEnum.startOffset(), docsAndPosEnum.endOffset()); } if (payloads) { writePayload(docsAndPosEnum.getPayload()); } } nextDoc = docsAndPosEnum.nextDoc(); assert nextDoc == DocIdSetIterator.NO_MORE_DOCS; return docsAndPosEnum; }
From source file:org.elasticsearch.common.lucene.docset.DocIdSets.java
License:Apache License
/** * Converts to a cacheable {@link DocIdSet} * <p/>/* ww w . java2 s.c om*/ * Note, we don't use {@link org.apache.lucene.search.DocIdSet#isCacheable()} because execution * might be expensive even if its cacheable (i.e. not going back to the reader to execute). We effectively * always either return an empty {@link DocIdSet} or {@link FixedBitSet} but never <code>null</code>. */ public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException { if (set == null || set == EMPTY_DOCIDSET) { return EMPTY_DOCIDSET; } DocIdSetIterator it = set.iterator(); if (it == null) { return EMPTY_DOCIDSET; } int doc = it.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { return EMPTY_DOCIDSET; } if (set instanceof FixedBitSet) { return set; } // TODO: should we use WAH8DocIdSet like Lucene? FixedBitSet fixedBitSet = new FixedBitSet(reader.maxDoc()); do { fixedBitSet.set(doc); doc = it.nextDoc(); } while (doc != DocIdSetIterator.NO_MORE_DOCS); return fixedBitSet; }
From source file:org.elasticsearch.common.lucene.docset.DocIdSets.java
License:Apache License
/** * Creates a {@link FixedBitSet} from an iterator. *///from w ww. j av a 2 s . co m public static FixedBitSet toFixedBitSet(DocIdSetIterator iterator, int numBits) throws IOException { FixedBitSet set = new FixedBitSet(numBits); int doc; while ((doc = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { set.set(doc); } return set; }
From source file:org.elasticsearch.common.lucene.docset.SlicedOpenBitSetTests.java
License:Apache License
@Test public void simpleTests() throws IOException { int numberOfBits = 500; SlicedOpenBitSet bitSet = new SlicedOpenBitSet(new long[OpenBitSet.bits2words(numberOfBits) + 100], OpenBitSet.bits2words(numberOfBits), 100); bitSet.fastSet(100);/*from w w w. ja v a 2s . c o m*/ assertThat(bitSet.fastGet(100), equalTo(true)); DocIdSetIterator iterator = bitSet.iterator(); assertThat(iterator.nextDoc(), equalTo(100)); assertThat(iterator.nextDoc(), equalTo(DocIdSetIterator.NO_MORE_DOCS)); }
From source file:org.elasticsearch.common.lucene.search.AndDocIdSetTests.java
License:Apache License
public void testDuel() throws IOException { for (int iter = 0; iter < 1000; ++iter) { final int numSets = 1 + random().nextInt(5); final int numDocs = 1 + random().nextInt(1000); FixedBitSet anded = new FixedBitSet(numDocs); anded.set(0, numDocs);//w ww.java2 s . c o m final DocIdSet[] sets = new DocIdSet[numSets]; for (int i = 0; i < numSets; ++i) { final FixedBitSet randomSet = randomBitSet(numDocs); anded.and(randomSet); if (random().nextBoolean()) { // will be considered 'fast' by AndDocIdSet sets[i] = new BitDocIdSet(randomSet); } else { // will be considered 'slow' by AndDocIdSet sets[i] = new DocValuesDocIdSet(numDocs, null) { @Override protected boolean matchDoc(int doc) { return randomSet.get(doc); } }; } } AndDocIdSet andSet = new AndDocIdSet(sets); Bits andBits = andSet.bits(); if (andBits != null) { for (int i = 0; i < numDocs; ++i) { assertEquals(anded.get(i), andBits.get(i)); } } DocIdSetIterator andIt = andSet.iterator(); if (andIt == null) { assertEquals(0, anded.cardinality()); } else { int previous = -1; for (int doc = andIt.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = andIt.nextDoc()) { for (int j = previous + 1; j < doc; ++j) { assertFalse(anded.get(j)); } assertTrue(anded.get(doc)); previous = doc; } for (int j = previous + 1; j < numDocs; ++j) { assertFalse(anded.get(j)); } } } }
From source file:org.elasticsearch.common.lucene.search.function.MinScoreScorerTests.java
License:Apache License
public void doTestRandom(boolean twoPhase) throws IOException { final int maxDoc = TestUtil.nextInt(random(), 10, 10000); final int numDocs = TestUtil.nextInt(random(), 1, maxDoc / 2); final Set<Integer> uniqueDocs = new HashSet<>(); while (uniqueDocs.size() < numDocs) { uniqueDocs.add(random().nextInt(maxDoc)); }/*w ww. jav a 2 s . c o m*/ final int[] docs = new int[numDocs]; int i = 0; for (int doc : uniqueDocs) { docs[i++] = doc; } Arrays.sort(docs); final float[] scores = new float[numDocs]; for (i = 0; i < numDocs; ++i) { scores[i] = random().nextFloat(); } Scorer scorer = scorer(maxDoc, docs, scores, twoPhase); final float minScore = random().nextFloat(); Scorer minScoreScorer = new MinScoreScorer(null, scorer, minScore); int doc = -1; while (doc != DocIdSetIterator.NO_MORE_DOCS) { final int target; if (random().nextBoolean()) { target = doc + 1; doc = minScoreScorer.iterator().nextDoc(); } else { target = doc + TestUtil.nextInt(random(), 1, 10); doc = minScoreScorer.iterator().advance(target); } int idx = Arrays.binarySearch(docs, target); if (idx < 0) { idx = -1 - idx; } while (idx < docs.length && scores[idx] < minScore) { idx += 1; } if (idx == docs.length) { assertEquals(DocIdSetIterator.NO_MORE_DOCS, doc); } else { assertEquals(docs[idx], doc); assertEquals(scores[idx], scorer.score(), 0f); } } }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java
License:Apache License
/** * Returns the a DocIdSetIterator representing the Boolean composition * of the filters that have been added./*from ww w. j av a 2 s .com*/ */ @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { FixedBitSet res = null; final AtomicReader reader = context.reader(); // optimize single case... if (clauses.size() == 1) { FilterClause clause = clauses.get(0); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST_NOT) { if (DocIdSets.isEmpty(set)) { return new AllDocIdSet(reader.maxDoc()); } else { return new NotDocIdSet(set, reader.maxDoc()); } } // SHOULD or MUST, just return the set... if (DocIdSets.isEmpty(set)) { return null; } return set; } // first, go over and see if we can shortcut the execution // and gather Bits if we need to List<ResultClause> results = new ArrayList<ResultClause>(clauses.size()); boolean hasShouldClauses = false; boolean hasNonEmptyShouldClause = false; boolean hasMustClauses = false; boolean hasMustNotClauses = false; for (int i = 0; i < clauses.size(); i++) { FilterClause clause = clauses.get(i); DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs); if (clause.getOccur() == Occur.MUST) { hasMustClauses = true; if (DocIdSets.isEmpty(set)) { return null; } } else if (clause.getOccur() == Occur.SHOULD) { hasShouldClauses = true; if (DocIdSets.isEmpty(set)) { continue; } hasNonEmptyShouldClause = true; } else if (clause.getOccur() == Occur.MUST_NOT) { hasMustNotClauses = true; if (DocIdSets.isEmpty(set)) { // we mark empty ones as null for must_not, handle it in the next run... results.add(new ResultClause(null, null, clause)); continue; } } Bits bits = null; if (!DocIdSets.isFastIterator(set)) { bits = set.bits(); } results.add(new ResultClause(set, bits, clause)); } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } // now, go over the clauses and apply the "fast" ones first... hasNonEmptyShouldClause = false; boolean hasBits = false; // But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs // that don't match with a must or must_not clause. List<ResultClause> fastOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { fastOrClauses.add(clause); } else if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } } else { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { hasNonEmptyShouldClause = true; res.or(it); } } } } // Now we safely handle the "fast" must and must_not clauses. for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); // we apply bits in based ones (slow) in the second run if (clause.bits != null) { hasBits = true; continue; } if (clause.clause.getOccur() == Occur.MUST) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { res.and(it); } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs } if (clause.docIdSet != null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } } } if (!hasBits) { if (!fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } } // we have some clauses with bits, apply them... // we let the "res" drive the computation, and check Bits for that List<ResultClause> slowOrClauses = new ArrayList<ResultClause>(); for (int i = 0; i < results.size(); i++) { ResultClause clause = results.get(i); if (clause.bits == null) { continue; } if (clause.clause.getOccur() == Occur.SHOULD) { if (hasMustClauses || hasMustNotClauses) { slowOrClauses.add(clause); } else { if (res == null) { DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { continue; } hasNonEmptyShouldClause = true; res = new FixedBitSet(reader.maxDoc()); res.or(it); } else { for (int doc = 0; doc < reader.maxDoc(); doc++) { if (!res.get(doc) && clause.bits.get(doc)) { hasNonEmptyShouldClause = true; res.set(doc); } } } } } else if (clause.clause.getOccur() == Occur.MUST) { if (res == null) { // nothing we can do, just or it... res = new FixedBitSet(reader.maxDoc()); DocIdSetIterator it = clause.docIdSet.iterator(); if (it == null) { return null; } res.or(it); } else { Bits bits = clause.bits; // use the "res" to drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (!bits.get(doc)) { res.clear(doc); } } } } else if (clause.clause.getOccur() == Occur.MUST_NOT) { if (res == null) { res = new FixedBitSet(reader.maxDoc()); res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs DocIdSetIterator it = clause.docIdSet.iterator(); if (it != null) { res.andNot(it); } } else { Bits bits = clause.bits; // let res drive the iteration DocIdSetIterator it = res.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (bits.get(doc)) { res.clear(doc); } } } } } // From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there // is already a must or must_not clause. However in the current ES bool filter behaviour at least one should // clause must match in order for a doc to be a match. What we do here is checking if matched docs match with // any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) { DocIdSetIterator it = res.iterator(); at_least_one_should_clause_iter: for (int setDoc = it .nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) { for (ResultClause fastOrClause : fastOrClauses) { DocIdSetIterator clauseIterator = fastOrClause.iterator(); if (clauseIterator == null) { continue; } if (iteratorMatch(clauseIterator, setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } for (ResultClause slowOrClause : slowOrClauses) { if (slowOrClause.bits.get(setDoc)) { hasNonEmptyShouldClause = true; continue at_least_one_should_clause_iter; } } res.clear(setDoc); } } if (hasShouldClauses && !hasNonEmptyShouldClause) { return null; } else { return res; } }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilter.java
License:Apache License
static boolean iteratorMatch(DocIdSetIterator docIdSetIterator, int target) throws IOException { assert docIdSetIterator != null; int current = docIdSetIterator.docID(); if (current == DocIdSetIterator.NO_MORE_DOCS || target < current) { return false; } else {/*from w w w . jav a2s. co m*/ if (current == target) { return true; } else { return docIdSetIterator.advance(target) == target; } } }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilterLuceneTests.java
License:Apache License
private void tstFilterCard(String mes, int expected, Filter filt) throws Exception { int actual = 0; DocIdSet docIdSet = filt.getDocIdSet(reader.getContext(), reader.getLiveDocs()); if (docIdSet != null) { DocIdSetIterator disi = docIdSet.iterator(); if (disi != null) { while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { actual++;//www . jav a2 s.co m } } } assertThat(mes, actual, equalTo(expected)); }
From source file:org.elasticsearch.common.lucene.search.XBooleanFilterTests.java
License:Apache License
@Test public void testRandom() throws IOException { int iterations = atLeast(400); // don't worry that is fast! for (int iter = 0; iter < iterations; iter++) { int numClauses = 1 + random().nextInt(10); FilterClause[] clauses = new FilterClause[numClauses]; BooleanQuery topLevel = new BooleanQuery(); BooleanQuery orQuery = new BooleanQuery(); boolean hasMust = false; boolean hasShould = false; boolean hasMustNot = false; for (int i = 0; i < numClauses; i++) { int field = random().nextInt(5); char value = distinctValues[random().nextInt(distinctValues.length)]; switch (random().nextInt(10)) { case 9: case 8: case 7: case 6: case 5: hasMust = true;/*from ww w . j a v a 2s. c o m*/ if (rarely()) { clauses[i] = new FilterClause(new EmptyFilter(), MUST); topLevel.add(new BooleanClause(new MatchNoDocsQuery(), MUST)); } else { clauses[i] = newFilterClause(field, value, MUST, random().nextBoolean()); topLevel.add(new BooleanClause( new TermQuery(new Term(String.valueOf(field), String.valueOf(value))), MUST)); } break; case 4: case 3: case 2: case 1: hasShould = true; if (rarely()) { clauses[i] = new FilterClause(new EmptyFilter(), SHOULD); orQuery.add(new BooleanClause(new MatchNoDocsQuery(), SHOULD)); } else { clauses[i] = newFilterClause(field, value, SHOULD, random().nextBoolean()); orQuery.add(new BooleanClause( new TermQuery(new Term(String.valueOf(field), String.valueOf(value))), SHOULD)); } break; case 0: hasMustNot = true; if (rarely()) { clauses[i] = new FilterClause(new EmptyFilter(), MUST_NOT); topLevel.add(new BooleanClause(new MatchNoDocsQuery(), MUST_NOT)); } else { clauses[i] = newFilterClause(field, value, MUST_NOT, random().nextBoolean()); topLevel.add(new BooleanClause( new TermQuery(new Term(String.valueOf(field), String.valueOf(value))), MUST_NOT)); } break; } } if (orQuery.getClauses().length > 0) { topLevel.add(new BooleanClause(orQuery, MUST)); } if (hasMustNot && !hasMust && !hasShould) { // pure negative topLevel.add(new BooleanClause(new MatchAllDocsQuery(), MUST)); } XBooleanFilter booleanFilter = createBooleanFilter(clauses); FixedBitSet leftResult = new FixedBitSet(reader.maxDoc()); FixedBitSet rightResult = new FixedBitSet(reader.maxDoc()); DocIdSet left = booleanFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()); DocIdSet right = new QueryWrapperFilter(topLevel).getDocIdSet(reader.getContext(), reader.getLiveDocs()); if (left == null || right == null) { if (left == null && right != null) { assertThat(errorMsg(clauses, topLevel), (right.iterator() == null ? DocIdSetIterator.NO_MORE_DOCS : right.iterator().nextDoc()), equalTo(DocIdSetIterator.NO_MORE_DOCS)); } if (left != null && right == null) { assertThat(errorMsg(clauses, topLevel), (left.iterator() == null ? DocIdSetIterator.NO_MORE_DOCS : left.iterator().nextDoc()), equalTo(DocIdSetIterator.NO_MORE_DOCS)); } } else { DocIdSetIterator leftIter = left.iterator(); DocIdSetIterator rightIter = right.iterator(); if (leftIter != null) { leftResult.or(leftIter); } if (rightIter != null) { rightResult.or(rightIter); } assertThat(leftResult.cardinality(), equalTo(rightResult.cardinality())); for (int i = 0; i < reader.maxDoc(); i++) { assertThat(errorMsg(clauses, topLevel) + " -- failed at index " + i, leftResult.get(i), equalTo(rightResult.get(i))); } } } }