Example usage for org.apache.lucene.search DocIdSetIterator nextDoc

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator nextDoc.

Prototype

public abstract int nextDoc() throws IOException;

Source Link

Document

Advances to the next document in the set and returns the doc it is currently on, or #NO_MORE_DOCS if there are no more docs in the set.
NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted behavior.

Usage

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.SecurityIndexSearcherWrapper.java

License:Open Source License

static void intersectScorerAndRoleBits(Scorer scorer, SparseFixedBitSet roleBits, LeafCollector collector,
        Bits acceptDocs) throws IOException {
    // ConjunctionDISI uses the DocIdSetIterator#cost() to order the iterators, so if roleBits has the lowest cardinality it should
    // be used first:
    DocIdSetIterator iterator = ConjunctionDISI.intersectIterators(
            Arrays.asList(new BitSetIterator(roleBits, roleBits.approximateCardinality()), scorer.iterator()));
    for (int docId = iterator.nextDoc(); docId < DocIdSetIterator.NO_MORE_DOCS; docId = iterator.nextDoc()) {
        if (acceptDocs == null || acceptDocs.get(docId)) {
            collector.collect(docId);/* ww w .  ja  v a  2 s .c o m*/
        }
    }
}

From source file:org.hibernate.search.filter.impl.AndDocIdSet.java

License:Open Source License

private int findFirstTargetPosition(final DocIdSetIterator[] iterators, OpenBitSet result) throws IOException {
    int targetPosition = iterators[0].nextDoc();
    if (targetPosition == DocIdSetIterator.NO_MORE_DOCS) {
        // first iterator has no values, so skip all
        return DocIdSetIterator.NO_MORE_DOCS;
    }//from  ww w  .j  a v a 2s. c o  m

    boolean allIteratorsShareSameFirstTarget = true;

    //iterator initialize, just one "next" for each DocIdSetIterator
    for (int i = 1; i < iterators.length; i++) {
        final DocIdSetIterator iterator = iterators[i];
        final int position = iterator.nextDoc();
        if (position == DocIdSetIterator.NO_MORE_DOCS) {
            //current iterator has no values, so skip all
            return DocIdSetIterator.NO_MORE_DOCS;
        }
        if (targetPosition != position) {
            targetPosition = max(targetPosition, position);
            allIteratorsShareSameFirstTarget = false;
        }
    }
    // end iterator initialize

    if (allIteratorsShareSameFirstTarget) {
        result.fastSet(targetPosition);
        targetPosition++;
    }

    return targetPosition;
}

From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java

License:Open Source License

@Test
public void testIteratorMatchesTestArray() throws IOException {
    DocIdSet docIdSet0_9 = arrayToDocIdSet(testDataFrom0to9);
    DocIdSetIterator docIdSetIterator = docIdSet0_9.iterator();
    assertTrue(docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals(0, docIdSetIterator.docID());
    assertEquals(9, docIdSetIterator.advance(9));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docIdSetIterator.advance(10));
}

From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java

License:Open Source License

private static void iterateOnResults(DocIdSet docIdBitSet) throws IOException {
    DocIdSetIterator iterator = docIdBitSet.iterator();
    int currentDoc;
    do {//  w  w w .j  av a2s  .  c o  m
        currentDoc = iterator.nextDoc();
    } while (currentDoc != DocIdSetIterator.NO_MORE_DOCS);
}

From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java

License:Open Source License

/**
 * @param expected the doc id set as expected
 * @param actual the doc id test as returned by the test
 *
 * @return true if the two DocIdSet are equal: contain the same number of ids, same order and all are equal
 *///from  w  w w. j  a va  2  s  .  c  om
public static boolean docIdSetsEqual(DocIdSet expected, DocIdSet actual) {
    try {
        DocIdSetIterator iterA = expected.iterator();
        DocIdSetIterator iterB = actual.iterator();
        int nextA;
        int nextB;
        do {
            nextA = iterA.nextDoc();
            nextB = iterB.nextDoc();
            if (nextA != nextB) {
                return false;
            }
            assertEquals(iterA.docID(), iterB.docID());
        } while (nextA != DocIdSetIterator.NO_MORE_DOCS);
    } catch (IOException ioe) {
        fail("these DocIdSetIterator instances should not throw any exceptions");
    }
    return true;
}

From source file:org.hibernate.search.test.filter.FiltersOptimizationTest.java

License:Open Source License

/**
 * Verifies if the docIdSet is representing a specific
 * sequence of docIds.//from  w  ww  .j  a v a  2 s  .  c  o m
 * @param docIdSet the docIdSet to test
 * @param expectedIds an array of document ids
 * @return true if iterating on docIdSet returns the expectedIds
 * @throws IOException should not happen
 */
private boolean isIdSetSequenceSameTo(DocIdSet docIdSet, int... expectedIds) throws IOException {
    DocIdSetIterator idSetIterator = docIdSet.iterator();
    for (int setBit : expectedIds) {
        int currentId = idSetIterator.nextDoc();
        if (currentId == DocIdSetIterator.NO_MORE_DOCS) {
            return false;
        }
        if (currentId != setBit) {
            return false;
        }
    }
    // and now test both sequences are at the end:
    return idSetIterator.nextDoc() == DocIdSetIterator.NO_MORE_DOCS;
}

From source file:org.hippoecm.repository.query.lucene.util.MultiDocIdSetTest.java

License:Apache License

@Test
public void testAdvance() throws IOException {
    Random rand = new Random(13);

    int[] maxDoc = new int[NUM_BITSETS];
    OpenBitSet[] bitsets = new OpenBitSet[NUM_BITSETS];
    for (int i = 0; i < NUM_BITSETS; i++) {
        OpenBitSet bitset = bitsets[i] = new OpenBitSet();
        for (int j = 0; j < NUM_DOCS_IN_BITSET; j++) {
            if (rand.nextInt(5) == 0) {
                bitset.set(j);/* w w w .j a v  a  2 s .c  o  m*/
            }
        }
        maxDoc[i] = NUM_DOCS_IN_BITSET;
    }
    int totalMaxDoc = NUM_BITSETS * NUM_DOCS_IN_BITSET;

    // compare nextDoc invocations with advance
    MultiDocIdSet docIdSet = new MultiDocIdSet(bitsets, maxDoc);
    final DocIdSetIterator simpleIterator = docIdSet.iterator();
    final DocIdSetIterator advancedIterator = docIdSet.iterator();

    int docId = 0;
    while (true) {
        final int delta = rand.nextInt(CHECK_INTERVAL);
        docId = docId + delta + 1;

        if (docId > totalMaxDoc) {
            break;
        }

        while (simpleIterator.docID() < docId && simpleIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            ;

        advancedIterator.advance(docId);

        assertEquals(simpleIterator.docID(), advancedIterator.docID());
    }
}

From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java

License:Open Source License

/**
 * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>. The field must have at most one indexed
 * token per document.//from w w  w  .j a  v  a2  s.c  o  m
 */
public NamedList<Object> getFieldCacheCounts(IndexSearcher searcher, OpenBitSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix, String locale,
        ExtendedPropertyDefinition epd) throws IOException {
    // TODO: If the number of terms is high compared to docs.size(), and zeros==false,
    // we should use an alternate strategy to avoid
    // 1) creating another huge int[] for the counts
    // 2) looping over that huge int[] looking for the rare non-zeros.
    //
    // Yet another variation: if docs.size() is small and termvectors are stored,
    // then use them instead of the FieldCache.
    //

    // TODO: this function is too big and could use some refactoring, but
    // we also need a facet cache, and refactoring of SimpleFacets instead of
    // trying to pass all the various params around.
    FieldType ft = getType(epd);
    NamedList<Object> res = new NamedList<Object>();

    FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getIndexReader(), fieldName);
    final String[] terms = si.lookup;
    final int[] termNum = si.order;

    if (prefix != null && prefix.length() == 0)
        prefix = null;

    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = Arrays.binarySearch(terms, prefix, nullStrComparator);
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        // find the end term. \uffff isn't a legal unicode char, but only compareTo
        // is used, so it should be fine, and is guaranteed to be bigger than legal chars.
        endTermIndex = Arrays.binarySearch(terms, prefix + "\uffff\uffff\uffff\uffff", nullStrComparator);
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = 1;
        endTermIndex = terms.length;
    }

    final int nTerms = endTermIndex - startTermIndex;

    if (nTerms > 0 && docs.size() >= mincount) {

        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];

        DocIdSetIterator iter = docs.iterator();
        while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            int term = termNum[iter.docID()];
            int arrIdx = term - startTermIndex;
            if (arrIdx >= 0 && arrIdx < nTerms)
                counts[arrIdx]++;
        }

        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'

        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = new TreeSet<SimpleFacets.CountPair<String, Integer>>();
            int min = mincount - 1; // the smallest value in the top 'N' values
            for (int i = 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered. This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).
                    queue.add(new SimpleFacets.CountPair<String, Integer>(terms[startTermIndex + i], c));
                    if (queue.size() >= maxsize) {
                        break;
                    }
                }
            }
            // now select the right page from the results
            for (SimpleFacets.CountPair<String, Integer> p : queue) {
                if (--off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                res.add(ft.indexedToReadable(p.key), p.val);
            }
        } else {
            // add results in index order
            int i = 0;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i = off;
                off = 0;
            }

            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount || --off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                res.add(ft.indexedToReadable(terms[startTermIndex + i]), c);
            }
        }
    }

    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale));
    }

    return res;
}

From source file:org.meresco.lucene.search.MerescoTaxonomyFacetCounts.java

License:Open Source License

private final void count(List<MatchingDocs> matchingDocs) throws IOException {
    IntsRef scratch = new IntsRef();
    OrdinalsReader.OrdinalsSegmentReader[] ordsReaders = new OrdinalsReader.OrdinalsSegmentReader[this.ordinalsReaders
            .size()];//from  w ww.j a v a 2  s . co  m
    for (MatchingDocs hits : matchingDocs) {
        for (int i = 0; i < ordsReaders.length; i++) {
            ordsReaders[i] = this.ordinalsReaders.get(i).getReader(hits.context);
        }
        DocIdSetIterator docs = hits.bits.iterator();
        int doc;
        while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            for (OrdinalsReader.OrdinalsSegmentReader ords : ordsReaders) {
                ords.get(doc, scratch);
                for (int i = 0; i < scratch.length; i++) {
                    values[scratch.ints[scratch.offset + i]]++;
                }
            }
        }
    }

    rollup();
}

From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollector.java

License:Open Source License

private void replayTo(Collector collector) throws IOException {
    for (MatchingDocs docs : getMatchingDocs()) {
        LeafCollector leafCollector = collector.getLeafCollector(docs.context);
        Scorer scorer;// w w  w .jav  a2 s  . c o  m
        DocIdSetIterator idIterator = docs.docIdSet.iterator();
        if (isKeepScores()) {
            scorer = new ReplayingScorer(docs.scores);
        } else {
            scorer = new ConstantScoreScorer(null, Float.NaN, idIterator);
        }
        leafCollector.setScorer(scorer);
        int doc;
        while ((doc = idIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            leafCollector.collect(doc);
        }
    }
}