Example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Prototype

int NO_MORE_DOCS

To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Click Source Link

Document

When returned by #nextDoc() , #advance(int) and #docID() it means there are no more docs in the iterator.

Usage

From source file:org.hippoecm.repository.query.lucene.HippoLuceneQueryHits.java

License:Apache License

/**
 * {@inheritDoc}/*from   w ww .  j a v a 2  s  . c o m*/
 */
public ScoreNode nextScoreNode() throws IOException {
    if (scorer == null) {
        return null;
    }
    if (filter == null) {
        return null;
    }

    int filterDocId = filter.nextDoc();
    int scorerDocId = scorer.advance(filterDocId);

    while (true) {
        if (filterDocId == scorerDocId) {
            if (scorerDocId == DocIdSetIterator.NO_MORE_DOCS) {
                break;
            }
            return getScoreNode(scorerDocId);
        } else if (scorerDocId > filterDocId) {
            filterDocId = filter.advance(scorerDocId);
        } else {
            scorerDocId = scorer.advance(filterDocId);
        }
    }

    return null;
}

From source file:org.hippoecm.repository.query.lucene.util.MultiDocIdSetTest.java

License:Apache License

@Test
public void testAdvance() throws IOException {
    Random rand = new Random(13);

    int[] maxDoc = new int[NUM_BITSETS];
    OpenBitSet[] bitsets = new OpenBitSet[NUM_BITSETS];
    for (int i = 0; i < NUM_BITSETS; i++) {
        OpenBitSet bitset = bitsets[i] = new OpenBitSet();
        for (int j = 0; j < NUM_DOCS_IN_BITSET; j++) {
            if (rand.nextInt(5) == 0) {
                bitset.set(j);//  w ww  .  j a v  a  2  s .  c om
            }
        }
        maxDoc[i] = NUM_DOCS_IN_BITSET;
    }
    int totalMaxDoc = NUM_BITSETS * NUM_DOCS_IN_BITSET;

    // compare nextDoc invocations with advance
    MultiDocIdSet docIdSet = new MultiDocIdSet(bitsets, maxDoc);
    final DocIdSetIterator simpleIterator = docIdSet.iterator();
    final DocIdSetIterator advancedIterator = docIdSet.iterator();

    int docId = 0;
    while (true) {
        final int delta = rand.nextInt(CHECK_INTERVAL);
        docId = docId + delta + 1;

        if (docId > totalMaxDoc) {
            break;
        }

        while (simpleIterator.docID() < docId && simpleIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            ;

        advancedIterator.advance(docId);

        assertEquals(simpleIterator.docID(), advancedIterator.docID());
    }
}

From source file:org.hippoecm.repository.query.lucene.util.SetDocIdSetBuilder.java

License:Apache License

public OpenBitSet toBitSet() throws IOException {
    long start = System.currentTimeMillis();
    final int size = docIdSets.size();
    DocIdSetIterator[] iterators = new DocIdSetIterator[size];
    for (int i = 0; i < size; i++) {
        iterators[i] = docIdSets.get(i).iterator();
        if (iterators[i] == null) {
            return new OpenBitSet();
        }//from   w  ww  .j  a  v a 2  s. co  m
    }

    OpenBitSet bitSet = new OpenBitSet();
    if (size == 0) {
        return bitSet;
    }

    int currentDoc = -1;
    int currentIter = -1;
    int iterIndex = 0;
    while (currentDoc != DocIdSetIterator.NO_MORE_DOCS) {
        if (iterIndex == currentIter) {
            bitSet.set(currentDoc);
            currentDoc = -1;
        }

        int newDoc;
        if (currentDoc == -1) {
            newDoc = iterators[iterIndex].nextDoc();
        } else {
            newDoc = iterators[iterIndex].advance(currentDoc);
        }

        if (newDoc > currentDoc) {
            currentIter = iterIndex;
            currentDoc = newDoc;
        }
        if (++iterIndex == size) {
            iterIndex = 0;
        }
    }
    log.info("Creating OpenBitSet of lenght '{}' for '{}' DocIdSet's took '{}' ms.",
            new String[] { String.valueOf(bitSet.length()), String.valueOf(size),
                    String.valueOf(System.currentTimeMillis() - start) });
    return bitSet;
}

From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java

License:Open Source License

/**
 * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>. The field must have at most one indexed
 * token per document.//w w w .j  av  a 2 s .c o  m
 */
public NamedList<Object> getFieldCacheCounts(IndexSearcher searcher, OpenBitSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix, String locale,
        ExtendedPropertyDefinition epd) throws IOException {
    // TODO: If the number of terms is high compared to docs.size(), and zeros==false,
    // we should use an alternate strategy to avoid
    // 1) creating another huge int[] for the counts
    // 2) looping over that huge int[] looking for the rare non-zeros.
    //
    // Yet another variation: if docs.size() is small and termvectors are stored,
    // then use them instead of the FieldCache.
    //

    // TODO: this function is too big and could use some refactoring, but
    // we also need a facet cache, and refactoring of SimpleFacets instead of
    // trying to pass all the various params around.
    FieldType ft = getType(epd);
    NamedList<Object> res = new NamedList<Object>();

    FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getIndexReader(), fieldName);
    final String[] terms = si.lookup;
    final int[] termNum = si.order;

    if (prefix != null && prefix.length() == 0)
        prefix = null;

    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = Arrays.binarySearch(terms, prefix, nullStrComparator);
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        // find the end term. \uffff isn't a legal unicode char, but only compareTo
        // is used, so it should be fine, and is guaranteed to be bigger than legal chars.
        endTermIndex = Arrays.binarySearch(terms, prefix + "\uffff\uffff\uffff\uffff", nullStrComparator);
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = 1;
        endTermIndex = terms.length;
    }

    final int nTerms = endTermIndex - startTermIndex;

    if (nTerms > 0 && docs.size() >= mincount) {

        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];

        DocIdSetIterator iter = docs.iterator();
        while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            int term = termNum[iter.docID()];
            int arrIdx = term - startTermIndex;
            if (arrIdx >= 0 && arrIdx < nTerms)
                counts[arrIdx]++;
        }

        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'

        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = new TreeSet<SimpleFacets.CountPair<String, Integer>>();
            int min = mincount - 1; // the smallest value in the top 'N' values
            for (int i = 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered. This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).
                    queue.add(new SimpleFacets.CountPair<String, Integer>(terms[startTermIndex + i], c));
                    if (queue.size() >= maxsize) {
                        break;
                    }
                }
            }
            // now select the right page from the results
            for (SimpleFacets.CountPair<String, Integer> p : queue) {
                if (--off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                res.add(ft.indexedToReadable(p.key), p.val);
            }
        } else {
            // add results in index order
            int i = 0;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i = off;
                off = 0;
            }

            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount || --off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                res.add(ft.indexedToReadable(terms[startTermIndex + i]), c);
            }
        }
    }

    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale));
    }

    return res;
}

From source file:org.kie.kieora.backend.lucene.setups.BaseLuceneSetup.java

License:Apache License

protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException {
    final List<AtomicReaderContext> subReaders = searcher.getIndexReader().leaves();
    final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()];
    final DocsEnum[] docsEnums = new DocsEnum[subReaders.size()];
    for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
        termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(null);
    }//from   www . jav a2 s.c o m

    int[] results = new int[ids.length];

    for (int i = 0; i < results.length; i++) {
        results[i] = -1;
    }

    for (int idx = 0; idx < ids.length; idx++) {
        int base = 0;
        final BytesRef id = new BytesRef(ids[idx]);
        for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
            final AtomicReader sub = subReaders.get(subIDX).reader();
            final TermsEnum termsEnum = termsEnums[subIDX];
            if (termsEnum.seekExact(id, false)) {
                final DocsEnum docs = docsEnums[subIDX] = termsEnum.docs(sub.getLiveDocs(), docsEnums[subIDX],
                        0);
                if (docs != null) {
                    final int docID = docs.nextDoc();
                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        results[idx] = base + docID;
                        break;
                    }
                }
            }
            base += sub.maxDoc();
        }
    }

    return results;
}

From source file:org.meresco.lucene.queries.KeyFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
    return BitsFilteredDocIdSet.wrap(new DocIdSet() {
        @Override//from   ww  w  . j a  va  2s  . com
        public DocIdSetIterator iterator() throws IOException {
            return new DocIdSetIterator() {
                private int[] keyValuesArray = KeyValuesCache.get(context, keyName);
                private int maxDoc = context.reader().maxDoc();
                int docId;

                @Override
                public int docID() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public int nextDoc() throws IOException {
                    if (keyValuesArray != null) {
                        try {
                            while (this.docId < this.maxDoc) {
                                int key = this.keyValuesArray[this.docId];
                                if (keySet.get(key)) {
                                    return this.docId++;
                                }
                                docId++;
                            }
                        } catch (IndexOutOfBoundsException e) {
                        }
                    }
                    this.docId = DocIdSetIterator.NO_MORE_DOCS;
                    return this.docId;
                }

                @Override
                public int advance(int target) throws IOException {
                    this.docId = target;
                    return nextDoc();
                }

                @Override
                public long cost() {
                    throw new UnsupportedOperationException();
                }
            };
        }
    }, acceptDocs);
}

From source file:org.meresco.lucene.search.MerescoTaxonomyFacetCounts.java

License:Open Source License

private final void count(List<MatchingDocs> matchingDocs) throws IOException {
    IntsRef scratch = new IntsRef();
    OrdinalsReader.OrdinalsSegmentReader[] ordsReaders = new OrdinalsReader.OrdinalsSegmentReader[this.ordinalsReaders
            .size()];//  www .j  a va  2 s  . c  om
    for (MatchingDocs hits : matchingDocs) {
        for (int i = 0; i < ordsReaders.length; i++) {
            ordsReaders[i] = this.ordinalsReaders.get(i).getReader(hits.context);
        }
        DocIdSetIterator docs = hits.bits.iterator();
        int doc;
        while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            for (OrdinalsReader.OrdinalsSegmentReader ords : ordsReaders) {
                ords.get(doc, scratch);
                for (int i = 0; i < scratch.length; i++) {
                    values[scratch.ints[scratch.offset + i]]++;
                }
            }
        }
    }

    rollup();
}

From source file:org.meresco.lucene.suggestion.SuggestionNGramKeysFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
    return BitsFilteredDocIdSet.wrap(new DocIdSet() {
        @Override/*from   w w w .j a  va2 s  .com*/
        public DocIdSetIterator iterator() throws IOException {
            return new DocIdSetIterator() {
                private BinaryDocValues keysDocValues = FieldCache.DEFAULT.getTerms(context.reader(), keyName,
                        false);
                private int maxDoc = context.reader().maxDoc();
                int docId;

                @Override
                public int docID() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public int nextDoc() throws IOException {
                    while (this.docId < this.maxDoc) {
                        String keys = this.keysDocValues.get(this.docId).utf8ToString();
                        for (String key : keys.split("\\|")) {
                            if (keySet.get(Integer.parseInt(key))) {
                                return this.docId++;
                            }
                        }
                        docId++;
                    }
                    this.docId = DocIdSetIterator.NO_MORE_DOCS;
                    return this.docId;
                }

                @Override
                public int advance(int target) throws IOException {
                    this.docId = target;
                    return nextDoc();
                }

                @Override
                public long cost() {
                    throw new UnsupportedOperationException();
                }
            };
        }
    }, acceptDocs);
}

From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollector.java

License:Open Source License

private void replayTo(Collector collector) throws IOException {
    for (MatchingDocs docs : getMatchingDocs()) {
        LeafCollector leafCollector = collector.getLeafCollector(docs.context);
        Scorer scorer;//from ww w. j  ava2  s  . c  o m
        DocIdSetIterator idIterator = docs.docIdSet.iterator();
        if (isKeepScores()) {
            scorer = new ReplayingScorer(docs.scores);
        } else {
            scorer = new ConstantScoreScorer(null, Float.NaN, idIterator);
        }
        leafCollector.setScorer(scorer);
        int doc;
        while ((doc = idIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            leafCollector.collect(doc);
        }
    }
}

From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollectorTest.java

License:Open Source License

@Test
public void shouldCollectAllHitsPerSegment() throws Exception {
    // given/*from   w  w w .j  ava  2  s.  c o  m*/
    DocValuesCollector collector = new DocValuesCollector();
    IndexReaderStub readerStub = indexReaderWithMaxDocs(42);

    // when
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(1);
    collector.collect(3);
    collector.collect(5);
    collector.collect(9);

    // then
    assertEquals(4, collector.getTotalHits());
    List<DocValuesCollector.MatchingDocs> allMatchingDocs = collector.getMatchingDocs();
    assertEquals(1, allMatchingDocs.size());
    DocValuesCollector.MatchingDocs matchingDocs = allMatchingDocs.get(0);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(4, matchingDocs.totalHits);
    DocIdSetIterator idIterator = matchingDocs.docIdSet.iterator();
    assertEquals(1, idIterator.nextDoc());
    assertEquals(3, idIterator.nextDoc());
    assertEquals(5, idIterator.nextDoc());
    assertEquals(9, idIterator.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, idIterator.nextDoc());
}