Example usage for org.apache.lucene.search DocIdSetIterator docID

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator docID.

Prototype

public abstract int docID();

Source Link

Document

Returns the following:

-1 if #nextDoc() or #advance(int) were not called yet.

Usage

From source file:org.elasticsearch.search.profile.ProfileScorer.java

License:Apache License

@Override
public TwoPhaseIterator twoPhaseIterator() {
    final TwoPhaseIterator in = scorer.twoPhaseIterator();
    if (in == null) {
        return null;
    }//from  ww w .j  ava 2s.  co  m
    final DocIdSetIterator inApproximation = in.approximation();
    final DocIdSetIterator approximation = new DocIdSetIterator() {

        @Override
        public int advance(int target) throws IOException {
            profile.startTime(ProfileBreakdown.TimingType.ADVANCE);
            try {
                return inApproximation.advance(target);
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public int nextDoc() throws IOException {
            profile.startTime(ProfileBreakdown.TimingType.NEXT_DOC);
            try {
                return inApproximation.nextDoc();
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public int docID() {
            return inApproximation.docID();
        }

        @Override
        public long cost() {
            return inApproximation.cost();
        }
    };
    return new TwoPhaseIterator(approximation) {
        @Override
        public boolean matches() throws IOException {
            profile.startTime(ProfileBreakdown.TimingType.MATCH);
            try {
                return in.matches();
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public float matchCost() {
            return in.matchCost();
        }
    };
}

From source file:org.hibernate.search.filter.impl.AndDocIdSet.java

License:Open Source License

private boolean iteratorAlreadyOnTargetPosition(int targetPosition, DocIdSetIterator iterator) {
    return iterator.docID() == targetPosition;
}

From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java

License:Open Source License

@Test
public void testIteratorMatchesTestArray() throws IOException {
    DocIdSet docIdSet0_9 = arrayToDocIdSet(testDataFrom0to9);
    DocIdSetIterator docIdSetIterator = docIdSet0_9.iterator();
    assertTrue(docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals(0, docIdSetIterator.docID());
    assertEquals(9, docIdSetIterator.advance(9));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docIdSetIterator.advance(10));
}

From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java

License:Open Source License

/**
 * @param expected the doc id set as expected
 * @param actual the doc id test as returned by the test
 *
 * @return true if the two DocIdSet are equal: contain the same number of ids, same order and all are equal
 *//*from  www  . jav a  2s .c o m*/
public static boolean docIdSetsEqual(DocIdSet expected, DocIdSet actual) {
    try {
        DocIdSetIterator iterA = expected.iterator();
        DocIdSetIterator iterB = actual.iterator();
        int nextA;
        int nextB;
        do {
            nextA = iterA.nextDoc();
            nextB = iterB.nextDoc();
            if (nextA != nextB) {
                return false;
            }
            assertEquals(iterA.docID(), iterB.docID());
        } while (nextA != DocIdSetIterator.NO_MORE_DOCS);
    } catch (IOException ioe) {
        fail("these DocIdSetIterator instances should not throw any exceptions");
    }
    return true;
}

From source file:org.hippoecm.repository.query.lucene.util.MultiDocIdSetTest.java

License:Apache License

@Test
public void testAdvance() throws IOException {
    Random rand = new Random(13);

    int[] maxDoc = new int[NUM_BITSETS];
    OpenBitSet[] bitsets = new OpenBitSet[NUM_BITSETS];
    for (int i = 0; i < NUM_BITSETS; i++) {
        OpenBitSet bitset = bitsets[i] = new OpenBitSet();
        for (int j = 0; j < NUM_DOCS_IN_BITSET; j++) {
            if (rand.nextInt(5) == 0) {
                bitset.set(j);/*from  ww  w.j a  v a  2 s  .co m*/
            }
        }
        maxDoc[i] = NUM_DOCS_IN_BITSET;
    }
    int totalMaxDoc = NUM_BITSETS * NUM_DOCS_IN_BITSET;

    // compare nextDoc invocations with advance
    MultiDocIdSet docIdSet = new MultiDocIdSet(bitsets, maxDoc);
    final DocIdSetIterator simpleIterator = docIdSet.iterator();
    final DocIdSetIterator advancedIterator = docIdSet.iterator();

    int docId = 0;
    while (true) {
        final int delta = rand.nextInt(CHECK_INTERVAL);
        docId = docId + delta + 1;

        if (docId > totalMaxDoc) {
            break;
        }

        while (simpleIterator.docID() < docId && simpleIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            ;

        advancedIterator.advance(docId);

        assertEquals(simpleIterator.docID(), advancedIterator.docID());
    }
}

From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java

License:Open Source License

/**
 * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>. The field must have at most one indexed
 * token per document./*w  ww  . ja  v a2s .c  o  m*/
 */
public NamedList<Object> getFieldCacheCounts(IndexSearcher searcher, OpenBitSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix, String locale,
        ExtendedPropertyDefinition epd) throws IOException {
    // TODO: If the number of terms is high compared to docs.size(), and zeros==false,
    // we should use an alternate strategy to avoid
    // 1) creating another huge int[] for the counts
    // 2) looping over that huge int[] looking for the rare non-zeros.
    //
    // Yet another variation: if docs.size() is small and termvectors are stored,
    // then use them instead of the FieldCache.
    //

    // TODO: this function is too big and could use some refactoring, but
    // we also need a facet cache, and refactoring of SimpleFacets instead of
    // trying to pass all the various params around.
    FieldType ft = getType(epd);
    NamedList<Object> res = new NamedList<Object>();

    FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getIndexReader(), fieldName);
    final String[] terms = si.lookup;
    final int[] termNum = si.order;

    if (prefix != null && prefix.length() == 0)
        prefix = null;

    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = Arrays.binarySearch(terms, prefix, nullStrComparator);
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        // find the end term. \uffff isn't a legal unicode char, but only compareTo
        // is used, so it should be fine, and is guaranteed to be bigger than legal chars.
        endTermIndex = Arrays.binarySearch(terms, prefix + "\uffff\uffff\uffff\uffff", nullStrComparator);
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = 1;
        endTermIndex = terms.length;
    }

    final int nTerms = endTermIndex - startTermIndex;

    if (nTerms > 0 && docs.size() >= mincount) {

        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];

        DocIdSetIterator iter = docs.iterator();
        while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            int term = termNum[iter.docID()];
            int arrIdx = term - startTermIndex;
            if (arrIdx >= 0 && arrIdx < nTerms)
                counts[arrIdx]++;
        }

        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'

        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = new TreeSet<SimpleFacets.CountPair<String, Integer>>();
            int min = mincount - 1; // the smallest value in the top 'N' values
            for (int i = 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered. This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).
                    queue.add(new SimpleFacets.CountPair<String, Integer>(terms[startTermIndex + i], c));
                    if (queue.size() >= maxsize) {
                        break;
                    }
                }
            }
            // now select the right page from the results
            for (SimpleFacets.CountPair<String, Integer> p : queue) {
                if (--off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                res.add(ft.indexedToReadable(p.key), p.val);
            }
        } else {
            // add results in index order
            int i = 0;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i = off;
                off = 0;
            }

            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount || --off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                res.add(ft.indexedToReadable(terms[startTermIndex + i]), c);
            }
        }
    }

    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale));
    }

    return res;
}

From source file:org.vootoo.search.function.ValueSourceCollectorFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(@SuppressWarnings("rawtypes") final Map context,
        final LeafReaderContext readerContext, Bits acceptDocs) throws IOException {
    collectorFilterable.doSetNextReader(context, readerContext);
    //TODO  check getDocIdSet use
    return BitsFilteredDocIdSet.wrap(new DocIdSet() {
        @Override/*from   ww w .j a v a 2 s . c om*/
        public long ramBytesUsed() {
            return 0;
        }

        @Override
        public DocIdSetIterator iterator() throws IOException {
            final DocIdSetIterator approximation = DocIdSetIterator.all(readerContext.reader().maxDoc()); // no approximation!
            TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approximation) {
                @Override
                public boolean matches() throws IOException {
                    return collectorFilterable.matches(approximation.docID());
                }

                @Override
                public float matchCost() {
                    return 100; // TODO: use cost of ValueSourceScorer.this.matches()
                }
            };
            return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
        }

        @Override
        public Bits bits() {
            return null; // don't use random access
        }
    }, acceptDocs);
}

From source file:org.voyanttools.trombone.tool.corpus.DocumentNgrams.java

License:Open Source License

List<DocumentNgram> getNgrams(CorpusMapper corpusMapper, Keywords stopwords) throws IOException {
    Corpus corpus = corpusMapper.getCorpus();
    int[] totalTokens = corpus.getLastTokenPositions(tokenType);
    FlexibleQueue<DocumentNgram> queue = new FlexibleQueue<DocumentNgram>(comparator, start + limit);

    Set<String> validIds = new HashSet<String>();
    validIds.addAll(this.getCorpusStoredDocumentIdsFromParameters(corpus));
    OverlapFilter filter = getDocumentNgramsOverlapFilter(parameters);
    DocIdSetIterator it = corpusMapper.getDocIdSet().iterator();
    while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        int luceneDoc = it.docID();
        String docId = corpusMapper.getDocumentIdFromLuceneId(luceneDoc);
        if (validIds.contains(docId) == false) {
            continue;
        }//from w  w w  . ja v a 2 s  .com
        int corpusDocumentIndex = corpusMapper.getDocumentPositionFromLuceneId(luceneDoc);
        int lastToken = totalTokens[corpusDocumentIndex];

        // build single grams as seed for ngrams
        SimplifiedTermInfo[] sparseSimplifiedTermInfoArray = getSparseSimplifiedTermInfoArray(corpusMapper,
                luceneDoc, lastToken);

        Map<String, List<int[]>> stringPositionsMap = new HashMap<String, List<int[]>>();
        for (int i = 0, len = sparseSimplifiedTermInfoArray.length; i < len; i++) {
            if (sparseSimplifiedTermInfoArray[i] != null
                    && sparseSimplifiedTermInfoArray[i].term.isEmpty() == false) {
                if (stringPositionsMap.containsKey(sparseSimplifiedTermInfoArray[i].term) == false) {
                    List<int[]> l = new ArrayList<int[]>();
                    l.add(new int[] { i, i });
                    stringPositionsMap.put(sparseSimplifiedTermInfoArray[i].term, l);
                } else {
                    stringPositionsMap.get(sparseSimplifiedTermInfoArray[i].term).add(new int[] { i, i });
                }
            }
        }

        List<DocumentNgram> ngrams = getNgramsFromStringPositions(stringPositionsMap, corpusDocumentIndex, 1);
        ngrams = getNextNgrams(ngrams, sparseSimplifiedTermInfoArray, corpusDocumentIndex, 2);

        ngrams = filter.getFilteredNgrams(ngrams, lastToken);

        for (DocumentNgram ngram : ngrams) {
            if (ngram.getLength() >= minLength && ngram.getLength() <= maxLength) {
                queue.offer(ngram);
            }
        }
    }

    return queue.getOrderedList(start);

}