Example usage for org.apache.lucene.search DocIdSetIterator nextDoc

List of usage examples for org.apache.lucene.search DocIdSetIterator nextDoc

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator nextDoc.

Prototype

public abstract int nextDoc() throws IOException;

Source Link

Document

Advances to the next document in the set and returns the doc it is currently on, or #NO_MORE_DOCS if there are no more docs in the set.
NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted behavior.

Usage

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.SecurityIndexSearcherWrapper.java

License:Open Source License

static void intersectScorerAndRoleBits(Scorer scorer, SparseFixedBitSet roleBits, LeafCollector collector,
        Bits acceptDocs) throws IOException {
    // ConjunctionDISI uses the DocIdSetIterator#cost() to order the iterators, so if roleBits has the lowest cardinality it should
    // be used first:
    DocIdSetIterator iterator = ConjunctionDISI.intersectIterators(
            Arrays.asList(new BitSetIterator(roleBits, roleBits.approximateCardinality()), scorer.iterator()));
    for (int docId = iterator.nextDoc(); docId < DocIdSetIterator.NO_MORE_DOCS; docId = iterator.nextDoc()) {
        if (acceptDocs == null || acceptDocs.get(docId)) {
            collector.collect(docId);/* ww w .  ja  v a  2 s .c o m*/
        }
    }
}

From source file:org.hibernate.search.filter.impl.AndDocIdSet.java

License:Open Source License

private int findFirstTargetPosition(final DocIdSetIterator[] iterators, OpenBitSet result) throws IOException {
    int targetPosition = iterators[0].nextDoc();
    if (targetPosition == DocIdSetIterator.NO_MORE_DOCS) {
        // first iterator has no values, so skip all
        return DocIdSetIterator.NO_MORE_DOCS;
    }//from  ww w  .j  a v a 2s. c o  m

    boolean allIteratorsShareSameFirstTarget = true;

    //iterator initialize, just one "next" for each DocIdSetIterator
    for (int i = 1; i < iterators.length; i++) {
        final DocIdSetIterator iterator = iterators[i];
        final int position = iterator.nextDoc();
        if (position == DocIdSetIterator.NO_MORE_DOCS) {
            //current iterator has no values, so skip all
            return DocIdSetIterator.NO_MORE_DOCS;
        }
        if (targetPosition != position) {
            targetPosition = max(targetPosition, position);
            allIteratorsShareSameFirstTarget = false;
        }
    }
    // end iterator initialize

    if (allIteratorsShareSameFirstTarget) {
        result.fastSet(targetPosition);
        targetPosition++;
    }

    return targetPosition;
}

From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java

License:Open Source License

@Test
public void testIteratorMatchesTestArray() throws IOException {
    DocIdSet docIdSet0_9 = arrayToDocIdSet(testDataFrom0to9);
    DocIdSetIterator docIdSetIterator = docIdSet0_9.iterator();
    assertTrue(docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals(0, docIdSetIterator.docID());
    assertEquals(9, docIdSetIterator.advance(9));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docIdSetIterator.advance(10));
}

From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java

License:Open Source License

private static void iterateOnResults(DocIdSet docIdBitSet) throws IOException {
    DocIdSetIterator iterator = docIdBitSet.iterator();
    int currentDoc;
    do {//  w  w w .j  av a2s  .  c o  m
        currentDoc = iterator.nextDoc();
    } while (currentDoc != DocIdSetIterator.NO_MORE_DOCS);
}

From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java

License:Open Source License

/**
 * @param expected the doc id set as expected
 * @param actual the doc id test as returned by the test
 *
 * @return true if the two DocIdSet are equal: contain the same number of ids, same order and all are equal
 *///from  w  w w. j  a va  2  s  .  c  om
public static boolean docIdSetsEqual(DocIdSet expected, DocIdSet actual) {
    try {
        DocIdSetIterator iterA = expected.iterator();
        DocIdSetIterator iterB = actual.iterator();
        int nextA;
        int nextB;
        do {
            nextA = iterA.nextDoc();
            nextB = iterB.nextDoc();
            if (nextA != nextB) {
                return false;
            }
            assertEquals(iterA.docID(), iterB.docID());
        } while (nextA != DocIdSetIterator.NO_MORE_DOCS);
    } catch (IOException ioe) {
        fail("these DocIdSetIterator instances should not throw any exceptions");
    }
    return true;
}

From source file:org.hibernate.search.test.filter.FiltersOptimizationTest.java

License:Open Source License

/**
 * Verifies if the docIdSet is representing a specific
 * sequence of docIds.//from  w  ww  .j  a v a  2 s  .  c  o m
 * @param docIdSet the docIdSet to test
 * @param expectedIds an array of document ids
 * @return true if iterating on docIdSet returns the expectedIds
 * @throws IOException should not happen
 */
private boolean isIdSetSequenceSameTo(DocIdSet docIdSet, int... expectedIds) throws IOException {
    DocIdSetIterator idSetIterator = docIdSet.iterator();
    for (int setBit : expectedIds) {
        int currentId = idSetIterator.nextDoc();
        if (currentId == DocIdSetIterator.NO_MORE_DOCS) {
            return false;
        }
        if (currentId != setBit) {
            return false;
        }
    }
    // and now test both sequences are at the end:
    return idSetIterator.nextDoc() == DocIdSetIterator.NO_MORE_DOCS;
}

From source file:org.hippoecm.repository.query.lucene.util.MultiDocIdSetTest.java

License:Apache License

@Test
public void testAdvance() throws IOException {
    Random rand = new Random(13);

    int[] maxDoc = new int[NUM_BITSETS];
    OpenBitSet[] bitsets = new OpenBitSet[NUM_BITSETS];
    for (int i = 0; i < NUM_BITSETS; i++) {
        OpenBitSet bitset = bitsets[i] = new OpenBitSet();
        for (int j = 0; j < NUM_DOCS_IN_BITSET; j++) {
            if (rand.nextInt(5) == 0) {
                bitset.set(j);/* w w w .j a v  a  2 s .c  o  m*/
            }
        }
        maxDoc[i] = NUM_DOCS_IN_BITSET;
    }
    int totalMaxDoc = NUM_BITSETS * NUM_DOCS_IN_BITSET;

    // compare nextDoc invocations with advance
    MultiDocIdSet docIdSet = new MultiDocIdSet(bitsets, maxDoc);
    final DocIdSetIterator simpleIterator = docIdSet.iterator();
    final DocIdSetIterator advancedIterator = docIdSet.iterator();

    int docId = 0;
    while (true) {
        final int delta = rand.nextInt(CHECK_INTERVAL);
        docId = docId + delta + 1;

        if (docId > totalMaxDoc) {
            break;
        }

        while (simpleIterator.docID() < docId && simpleIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            ;

        advancedIterator.advance(docId);

        assertEquals(simpleIterator.docID(), advancedIterator.docID());
    }
}

From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java

License:Open Source License

/**
 * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>. The field must have at most one indexed
 * token per document.//from w w  w  .j a  v  a2  s.c  o  m
 */
public NamedList<Object> getFieldCacheCounts(IndexSearcher searcher, OpenBitSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix, String locale,
        ExtendedPropertyDefinition epd) throws IOException {
    // TODO: If the number of terms is high compared to docs.size(), and zeros==false,
    // we should use an alternate strategy to avoid
    // 1) creating another huge int[] for the counts
    // 2) looping over that huge int[] looking for the rare non-zeros.
    //
    // Yet another variation: if docs.size() is small and termvectors are stored,
    // then use them instead of the FieldCache.
    //

    // TODO: this function is too big and could use some refactoring, but
    // we also need a facet cache, and refactoring of SimpleFacets instead of
    // trying to pass all the various params around.
    FieldType ft = getType(epd);
    NamedList<Object> res = new NamedList<Object>();

    FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getIndexReader(), fieldName);
    final String[] terms = si.lookup;
    final int[] termNum = si.order;

    if (prefix != null && prefix.length() == 0)
        prefix = null;

    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = Arrays.binarySearch(terms, prefix, nullStrComparator);
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        // find the end term. \uffff isn't a legal unicode char, but only compareTo
        // is used, so it should be fine, and is guaranteed to be bigger than legal chars.
        endTermIndex = Arrays.binarySearch(terms, prefix + "\uffff\uffff\uffff\uffff", nullStrComparator);
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = 1;
        endTermIndex = terms.length;
    }

    final int nTerms = endTermIndex - startTermIndex;

    if (nTerms > 0 && docs.size() >= mincount) {

        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];

        DocIdSetIterator iter = docs.iterator();
        while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            int term = termNum[iter.docID()];
            int arrIdx = term - startTermIndex;
            if (arrIdx >= 0 && arrIdx < nTerms)
                counts[arrIdx]++;
        }

        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'

        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = new TreeSet<SimpleFacets.CountPair<String, Integer>>();
            int min = mincount - 1; // the smallest value in the top 'N' values
            for (int i = 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered. This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).
                    queue.add(new SimpleFacets.CountPair<String, Integer>(terms[startTermIndex + i], c));
                    if (queue.size() >= maxsize) {
                        break;
                    }
                }
            }
            // now select the right page from the results
            for (SimpleFacets.CountPair<String, Integer> p : queue) {
                if (--off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                res.add(ft.indexedToReadable(p.key), p.val);
            }
        } else {
            // add results in index order
            int i = 0;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i = off;
                off = 0;
            }

            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount || --off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                res.add(ft.indexedToReadable(terms[startTermIndex + i]), c);
            }
        }
    }

    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale));
    }

    return res;
}

From source file:org.meresco.lucene.search.MerescoTaxonomyFacetCounts.java

License:Open Source License

private final void count(List<MatchingDocs> matchingDocs) throws IOException {
    IntsRef scratch = new IntsRef();
    OrdinalsReader.OrdinalsSegmentReader[] ordsReaders = new OrdinalsReader.OrdinalsSegmentReader[this.ordinalsReaders
            .size()];//from  w ww.j a v a 2  s . co  m
    for (MatchingDocs hits : matchingDocs) {
        for (int i = 0; i < ordsReaders.length; i++) {
            ordsReaders[i] = this.ordinalsReaders.get(i).getReader(hits.context);
        }
        DocIdSetIterator docs = hits.bits.iterator();
        int doc;
        while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            for (OrdinalsReader.OrdinalsSegmentReader ords : ordsReaders) {
                ords.get(doc, scratch);
                for (int i = 0; i < scratch.length; i++) {
                    values[scratch.ints[scratch.offset + i]]++;
                }
            }
        }
    }

    rollup();
}

From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollector.java

License:Open Source License

private void replayTo(Collector collector) throws IOException {
    for (MatchingDocs docs : getMatchingDocs()) {
        LeafCollector leafCollector = collector.getLeafCollector(docs.context);
        Scorer scorer;// w w  w .jav  a2 s  . c o  m
        DocIdSetIterator idIterator = docs.docIdSet.iterator();
        if (isKeepScores()) {
            scorer = new ReplayingScorer(docs.scores);
        } else {
            scorer = new ConstantScoreScorer(null, Float.NaN, idIterator);
        }
        leafCollector.setScorer(scorer);
        int doc;
        while ((doc = idIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            leafCollector.collect(doc);
        }
    }
}