List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:org.hippoecm.repository.query.lucene.HippoLuceneQueryHits.java
License:Apache License
/** * {@inheritDoc}/*from w ww . j a v a 2 s . c o m*/ */ public ScoreNode nextScoreNode() throws IOException { if (scorer == null) { return null; } if (filter == null) { return null; } int filterDocId = filter.nextDoc(); int scorerDocId = scorer.advance(filterDocId); while (true) { if (filterDocId == scorerDocId) { if (scorerDocId == DocIdSetIterator.NO_MORE_DOCS) { break; } return getScoreNode(scorerDocId); } else if (scorerDocId > filterDocId) { filterDocId = filter.advance(scorerDocId); } else { scorerDocId = scorer.advance(filterDocId); } } return null; }
From source file:org.hippoecm.repository.query.lucene.util.MultiDocIdSetTest.java
License:Apache License
@Test public void testAdvance() throws IOException { Random rand = new Random(13); int[] maxDoc = new int[NUM_BITSETS]; OpenBitSet[] bitsets = new OpenBitSet[NUM_BITSETS]; for (int i = 0; i < NUM_BITSETS; i++) { OpenBitSet bitset = bitsets[i] = new OpenBitSet(); for (int j = 0; j < NUM_DOCS_IN_BITSET; j++) { if (rand.nextInt(5) == 0) { bitset.set(j);// w ww . j a v a 2 s . c om } } maxDoc[i] = NUM_DOCS_IN_BITSET; } int totalMaxDoc = NUM_BITSETS * NUM_DOCS_IN_BITSET; // compare nextDoc invocations with advance MultiDocIdSet docIdSet = new MultiDocIdSet(bitsets, maxDoc); final DocIdSetIterator simpleIterator = docIdSet.iterator(); final DocIdSetIterator advancedIterator = docIdSet.iterator(); int docId = 0; while (true) { final int delta = rand.nextInt(CHECK_INTERVAL); docId = docId + delta + 1; if (docId > totalMaxDoc) { break; } while (simpleIterator.docID() < docId && simpleIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) ; advancedIterator.advance(docId); assertEquals(simpleIterator.docID(), advancedIterator.docID()); } }
From source file:org.hippoecm.repository.query.lucene.util.SetDocIdSetBuilder.java
License:Apache License
public OpenBitSet toBitSet() throws IOException { long start = System.currentTimeMillis(); final int size = docIdSets.size(); DocIdSetIterator[] iterators = new DocIdSetIterator[size]; for (int i = 0; i < size; i++) { iterators[i] = docIdSets.get(i).iterator(); if (iterators[i] == null) { return new OpenBitSet(); }//from w ww .j a v a 2 s. co m } OpenBitSet bitSet = new OpenBitSet(); if (size == 0) { return bitSet; } int currentDoc = -1; int currentIter = -1; int iterIndex = 0; while (currentDoc != DocIdSetIterator.NO_MORE_DOCS) { if (iterIndex == currentIter) { bitSet.set(currentDoc); currentDoc = -1; } int newDoc; if (currentDoc == -1) { newDoc = iterators[iterIndex].nextDoc(); } else { newDoc = iterators[iterIndex].advance(currentDoc); } if (newDoc > currentDoc) { currentIter = iterIndex; currentDoc = newDoc; } if (++iterIndex == size) { iterIndex = 0; } } log.info("Creating OpenBitSet of lenght '{}' for '{}' DocIdSet's took '{}' ms.", new String[] { String.valueOf(bitSet.length()), String.valueOf(size), String.valueOf(System.currentTimeMillis() - start) }); return bitSet; }
From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java
License:Open Source License
/** * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>. The field must have at most one indexed * token per document.//w w w .j av a 2 s .c o m */ public NamedList<Object> getFieldCacheCounts(IndexSearcher searcher, OpenBitSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String locale, ExtendedPropertyDefinition epd) throws IOException { // TODO: If the number of terms is high compared to docs.size(), and zeros==false, // we should use an alternate strategy to avoid // 1) creating another huge int[] for the counts // 2) looping over that huge int[] looking for the rare non-zeros. // // Yet another variation: if docs.size() is small and termvectors are stored, // then use them instead of the FieldCache. // // TODO: this function is too big and could use some refactoring, but // we also need a facet cache, and refactoring of SimpleFacets instead of // trying to pass all the various params around. FieldType ft = getType(epd); NamedList<Object> res = new NamedList<Object>(); FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getIndexReader(), fieldName); final String[] terms = si.lookup; final int[] termNum = si.order; if (prefix != null && prefix.length() == 0) prefix = null; int startTermIndex, endTermIndex; if (prefix != null) { startTermIndex = Arrays.binarySearch(terms, prefix, nullStrComparator); if (startTermIndex < 0) startTermIndex = -startTermIndex - 1; // find the end term. \uffff isn't a legal unicode char, but only compareTo // is used, so it should be fine, and is guaranteed to be bigger than legal chars. endTermIndex = Arrays.binarySearch(terms, prefix + "\uffff\uffff\uffff\uffff", nullStrComparator); endTermIndex = -endTermIndex - 1; } else { startTermIndex = 1; endTermIndex = terms.length; } final int nTerms = endTermIndex - startTermIndex; if (nTerms > 0 && docs.size() >= mincount) { // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = new int[nTerms]; DocIdSetIterator iter = docs.iterator(); while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int term = termNum[iter.docID()]; int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < nTerms) counts[arrIdx]++; } // IDEA: we could also maintain a count of "other"... everything that fell outside // of the top 'N' int off = offset; int lim = limit >= 0 ? limit : Integer.MAX_VALUE; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1; maxsize = Math.min(maxsize, nTerms); final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = new TreeSet<SimpleFacets.CountPair<String, Integer>>(); int min = mincount - 1; // the smallest value in the top 'N' values for (int i = 0; i < nTerms; i++) { int c = counts[i]; if (c > min) { // NOTE: we use c>min rather than c>=min as an optimization because we are going in // index order, so we already know that the keys are ordered. This can be very // important if a lot of the counts are repeated (like zero counts would be). queue.add(new SimpleFacets.CountPair<String, Integer>(terms[startTermIndex + i], c)); if (queue.size() >= maxsize) { break; } } } // now select the right page from the results for (SimpleFacets.CountPair<String, Integer> p : queue) { if (--off >= 0) continue; if (--lim < 0) break; res.add(ft.indexedToReadable(p.key), p.val); } } else { // add results in index order int i = 0; if (mincount <= 0) { // if mincount<=0, then we won't discard any terms and we know exactly // where to start. i = off; off = 0; } for (; i < nTerms; i++) { int c = counts[i]; if (c < mincount || --off >= 0) continue; if (--lim < 0) break; res.add(ft.indexedToReadable(terms[startTermIndex + i]), c); } } } if (missing) { res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale)); } return res; }
From source file:org.kie.kieora.backend.lucene.setups.BaseLuceneSetup.java
License:Apache License
protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException { final List<AtomicReaderContext> subReaders = searcher.getIndexReader().leaves(); final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()]; final DocsEnum[] docsEnums = new DocsEnum[subReaders.size()]; for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(null); }//from www . jav a2 s.c o m int[] results = new int[ids.length]; for (int i = 0; i < results.length; i++) { results[i] = -1; } for (int idx = 0; idx < ids.length; idx++) { int base = 0; final BytesRef id = new BytesRef(ids[idx]); for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { final AtomicReader sub = subReaders.get(subIDX).reader(); final TermsEnum termsEnum = termsEnums[subIDX]; if (termsEnum.seekExact(id, false)) { final DocsEnum docs = docsEnums[subIDX] = termsEnum.docs(sub.getLiveDocs(), docsEnums[subIDX], 0); if (docs != null) { final int docID = docs.nextDoc(); if (docID != DocIdSetIterator.NO_MORE_DOCS) { results[idx] = base + docID; break; } } } base += sub.maxDoc(); } } return results; }
From source file:org.meresco.lucene.queries.KeyFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(final AtomicReaderContext context, Bits acceptDocs) throws IOException { return BitsFilteredDocIdSet.wrap(new DocIdSet() { @Override//from ww w . j a va 2s . com public DocIdSetIterator iterator() throws IOException { return new DocIdSetIterator() { private int[] keyValuesArray = KeyValuesCache.get(context, keyName); private int maxDoc = context.reader().maxDoc(); int docId; @Override public int docID() { throw new UnsupportedOperationException(); } @Override public int nextDoc() throws IOException { if (keyValuesArray != null) { try { while (this.docId < this.maxDoc) { int key = this.keyValuesArray[this.docId]; if (keySet.get(key)) { return this.docId++; } docId++; } } catch (IndexOutOfBoundsException e) { } } this.docId = DocIdSetIterator.NO_MORE_DOCS; return this.docId; } @Override public int advance(int target) throws IOException { this.docId = target; return nextDoc(); } @Override public long cost() { throw new UnsupportedOperationException(); } }; } }, acceptDocs); }
From source file:org.meresco.lucene.search.MerescoTaxonomyFacetCounts.java
License:Open Source License
private final void count(List<MatchingDocs> matchingDocs) throws IOException { IntsRef scratch = new IntsRef(); OrdinalsReader.OrdinalsSegmentReader[] ordsReaders = new OrdinalsReader.OrdinalsSegmentReader[this.ordinalsReaders .size()];// www .j a va 2 s . c om for (MatchingDocs hits : matchingDocs) { for (int i = 0; i < ordsReaders.length; i++) { ordsReaders[i] = this.ordinalsReaders.get(i).getReader(hits.context); } DocIdSetIterator docs = hits.bits.iterator(); int doc; while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { for (OrdinalsReader.OrdinalsSegmentReader ords : ordsReaders) { ords.get(doc, scratch); for (int i = 0; i < scratch.length; i++) { values[scratch.ints[scratch.offset + i]]++; } } } } rollup(); }
From source file:org.meresco.lucene.suggestion.SuggestionNGramKeysFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(final AtomicReaderContext context, Bits acceptDocs) throws IOException { return BitsFilteredDocIdSet.wrap(new DocIdSet() { @Override/*from w w w .j a va2 s .com*/ public DocIdSetIterator iterator() throws IOException { return new DocIdSetIterator() { private BinaryDocValues keysDocValues = FieldCache.DEFAULT.getTerms(context.reader(), keyName, false); private int maxDoc = context.reader().maxDoc(); int docId; @Override public int docID() { throw new UnsupportedOperationException(); } @Override public int nextDoc() throws IOException { while (this.docId < this.maxDoc) { String keys = this.keysDocValues.get(this.docId).utf8ToString(); for (String key : keys.split("\\|")) { if (keySet.get(Integer.parseInt(key))) { return this.docId++; } } docId++; } this.docId = DocIdSetIterator.NO_MORE_DOCS; return this.docId; } @Override public int advance(int target) throws IOException { this.docId = target; return nextDoc(); } @Override public long cost() { throw new UnsupportedOperationException(); } }; } }, acceptDocs); }
From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollector.java
License:Open Source License
private void replayTo(Collector collector) throws IOException { for (MatchingDocs docs : getMatchingDocs()) { LeafCollector leafCollector = collector.getLeafCollector(docs.context); Scorer scorer;//from ww w. j ava2 s . c o m DocIdSetIterator idIterator = docs.docIdSet.iterator(); if (isKeepScores()) { scorer = new ReplayingScorer(docs.scores); } else { scorer = new ConstantScoreScorer(null, Float.NaN, idIterator); } leafCollector.setScorer(scorer); int doc; while ((doc = idIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { leafCollector.collect(doc); } } }
From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollectorTest.java
License:Open Source License
@Test public void shouldCollectAllHitsPerSegment() throws Exception { // given/*from w w w .j ava 2 s. c o m*/ DocValuesCollector collector = new DocValuesCollector(); IndexReaderStub readerStub = indexReaderWithMaxDocs(42); // when collector.doSetNextReader(readerStub.getContext()); collector.collect(1); collector.collect(3); collector.collect(5); collector.collect(9); // then assertEquals(4, collector.getTotalHits()); List<DocValuesCollector.MatchingDocs> allMatchingDocs = collector.getMatchingDocs(); assertEquals(1, allMatchingDocs.size()); DocValuesCollector.MatchingDocs matchingDocs = allMatchingDocs.get(0); assertSame(readerStub.getContext(), matchingDocs.context); assertEquals(4, matchingDocs.totalHits); DocIdSetIterator idIterator = matchingDocs.docIdSet.iterator(); assertEquals(1, idIterator.nextDoc()); assertEquals(3, idIterator.nextDoc()); assertEquals(5, idIterator.nextDoc()); assertEquals(9, idIterator.nextDoc()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, idIterator.nextDoc()); }