Example usage for org.apache.lucene.search DocIdSetIterator docID

List of usage examples for org.apache.lucene.search DocIdSetIterator docID

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator docID.

Prototype

public abstract int docID();

Source Link

Document

Returns the following:
  • -1 if #nextDoc() or #advance(int) were not called yet.

    Usage

    From source file:org.elasticsearch.search.profile.ProfileScorer.java

    License:Apache License

    @Override
    public TwoPhaseIterator twoPhaseIterator() {
        final TwoPhaseIterator in = scorer.twoPhaseIterator();
        if (in == null) {
            return null;
        }//from  ww w .j  ava 2s.  co  m
        final DocIdSetIterator inApproximation = in.approximation();
        final DocIdSetIterator approximation = new DocIdSetIterator() {
    
            @Override
            public int advance(int target) throws IOException {
                profile.startTime(ProfileBreakdown.TimingType.ADVANCE);
                try {
                    return inApproximation.advance(target);
                } finally {
                    profile.stopAndRecordTime();
                }
            }
    
            @Override
            public int nextDoc() throws IOException {
                profile.startTime(ProfileBreakdown.TimingType.NEXT_DOC);
                try {
                    return inApproximation.nextDoc();
                } finally {
                    profile.stopAndRecordTime();
                }
            }
    
            @Override
            public int docID() {
                return inApproximation.docID();
            }
    
            @Override
            public long cost() {
                return inApproximation.cost();
            }
        };
        return new TwoPhaseIterator(approximation) {
            @Override
            public boolean matches() throws IOException {
                profile.startTime(ProfileBreakdown.TimingType.MATCH);
                try {
                    return in.matches();
                } finally {
                    profile.stopAndRecordTime();
                }
            }
    
            @Override
            public float matchCost() {
                return in.matchCost();
            }
        };
    }
    

    From source file:org.hibernate.search.filter.impl.AndDocIdSet.java

    License:Open Source License

    private boolean iteratorAlreadyOnTargetPosition(int targetPosition, DocIdSetIterator iterator) {
        return iterator.docID() == targetPosition;
    }
    

    From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java

    License:Open Source License

    @Test
    public void testIteratorMatchesTestArray() throws IOException {
        DocIdSet docIdSet0_9 = arrayToDocIdSet(testDataFrom0to9);
        DocIdSetIterator docIdSetIterator = docIdSet0_9.iterator();
        assertTrue(docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(0, docIdSetIterator.docID());
        assertEquals(9, docIdSetIterator.advance(9));
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docIdSetIterator.advance(10));
    }
    

    From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java

    License:Open Source License

    /**
     * @param expected the doc id set as expected
     * @param actual the doc id test as returned by the test
     *
     * @return true if the two DocIdSet are equal: contain the same number of ids, same order and all are equal
     *//*from  www  . jav a  2s .c o m*/
    public static boolean docIdSetsEqual(DocIdSet expected, DocIdSet actual) {
        try {
            DocIdSetIterator iterA = expected.iterator();
            DocIdSetIterator iterB = actual.iterator();
            int nextA;
            int nextB;
            do {
                nextA = iterA.nextDoc();
                nextB = iterB.nextDoc();
                if (nextA != nextB) {
                    return false;
                }
                assertEquals(iterA.docID(), iterB.docID());
            } while (nextA != DocIdSetIterator.NO_MORE_DOCS);
        } catch (IOException ioe) {
            fail("these DocIdSetIterator instances should not throw any exceptions");
        }
        return true;
    }
    

    From source file:org.hippoecm.repository.query.lucene.util.MultiDocIdSetTest.java

    License:Apache License

    @Test
    public void testAdvance() throws IOException {
        Random rand = new Random(13);
    
        int[] maxDoc = new int[NUM_BITSETS];
        OpenBitSet[] bitsets = new OpenBitSet[NUM_BITSETS];
        for (int i = 0; i < NUM_BITSETS; i++) {
            OpenBitSet bitset = bitsets[i] = new OpenBitSet();
            for (int j = 0; j < NUM_DOCS_IN_BITSET; j++) {
                if (rand.nextInt(5) == 0) {
                    bitset.set(j);/*from  ww  w.j a  v a  2 s  .co m*/
                }
            }
            maxDoc[i] = NUM_DOCS_IN_BITSET;
        }
        int totalMaxDoc = NUM_BITSETS * NUM_DOCS_IN_BITSET;
    
        // compare nextDoc invocations with advance
        MultiDocIdSet docIdSet = new MultiDocIdSet(bitsets, maxDoc);
        final DocIdSetIterator simpleIterator = docIdSet.iterator();
        final DocIdSetIterator advancedIterator = docIdSet.iterator();
    
        int docId = 0;
        while (true) {
            final int delta = rand.nextInt(CHECK_INTERVAL);
            docId = docId + delta + 1;
    
            if (docId > totalMaxDoc) {
                break;
            }
    
            while (simpleIterator.docID() < docId && simpleIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                ;
    
            advancedIterator.advance(docId);
    
            assertEquals(simpleIterator.docID(), advancedIterator.docID());
        }
    }
    

    From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java

    License:Open Source License

    /**
     * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>. The field must have at most one indexed
     * token per document./*w  ww  . ja  v a2s .c  o  m*/
     */
    public NamedList<Object> getFieldCacheCounts(IndexSearcher searcher, OpenBitSet docs, String fieldName,
            int offset, int limit, int mincount, boolean missing, String sort, String prefix, String locale,
            ExtendedPropertyDefinition epd) throws IOException {
        // TODO: If the number of terms is high compared to docs.size(), and zeros==false,
        // we should use an alternate strategy to avoid
        // 1) creating another huge int[] for the counts
        // 2) looping over that huge int[] looking for the rare non-zeros.
        //
        // Yet another variation: if docs.size() is small and termvectors are stored,
        // then use them instead of the FieldCache.
        //
    
        // TODO: this function is too big and could use some refactoring, but
        // we also need a facet cache, and refactoring of SimpleFacets instead of
        // trying to pass all the various params around.
        FieldType ft = getType(epd);
        NamedList<Object> res = new NamedList<Object>();
    
        FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getIndexReader(), fieldName);
        final String[] terms = si.lookup;
        final int[] termNum = si.order;
    
        if (prefix != null && prefix.length() == 0)
            prefix = null;
    
        int startTermIndex, endTermIndex;
        if (prefix != null) {
            startTermIndex = Arrays.binarySearch(terms, prefix, nullStrComparator);
            if (startTermIndex < 0)
                startTermIndex = -startTermIndex - 1;
            // find the end term. \uffff isn't a legal unicode char, but only compareTo
            // is used, so it should be fine, and is guaranteed to be bigger than legal chars.
            endTermIndex = Arrays.binarySearch(terms, prefix + "\uffff\uffff\uffff\uffff", nullStrComparator);
            endTermIndex = -endTermIndex - 1;
        } else {
            startTermIndex = 1;
            endTermIndex = terms.length;
        }
    
        final int nTerms = endTermIndex - startTermIndex;
    
        if (nTerms > 0 && docs.size() >= mincount) {
    
            // count collection array only needs to be as big as the number of terms we are
            // going to collect counts for.
            final int[] counts = new int[nTerms];
    
            DocIdSetIterator iter = docs.iterator();
            while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                int term = termNum[iter.docID()];
                int arrIdx = term - startTermIndex;
                if (arrIdx >= 0 && arrIdx < nTerms)
                    counts[arrIdx]++;
            }
    
            // IDEA: we could also maintain a count of "other"... everything that fell outside
            // of the top 'N'
    
            int off = offset;
            int lim = limit >= 0 ? limit : Integer.MAX_VALUE;
    
            if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
                int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
                maxsize = Math.min(maxsize, nTerms);
                final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = new TreeSet<SimpleFacets.CountPair<String, Integer>>();
                int min = mincount - 1; // the smallest value in the top 'N' values
                for (int i = 0; i < nTerms; i++) {
                    int c = counts[i];
                    if (c > min) {
                        // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                        // index order, so we already know that the keys are ordered. This can be very
                        // important if a lot of the counts are repeated (like zero counts would be).
                        queue.add(new SimpleFacets.CountPair<String, Integer>(terms[startTermIndex + i], c));
                        if (queue.size() >= maxsize) {
                            break;
                        }
                    }
                }
                // now select the right page from the results
                for (SimpleFacets.CountPair<String, Integer> p : queue) {
                    if (--off >= 0)
                        continue;
                    if (--lim < 0)
                        break;
                    res.add(ft.indexedToReadable(p.key), p.val);
                }
            } else {
                // add results in index order
                int i = 0;
                if (mincount <= 0) {
                    // if mincount<=0, then we won't discard any terms and we know exactly
                    // where to start.
                    i = off;
                    off = 0;
                }
    
                for (; i < nTerms; i++) {
                    int c = counts[i];
                    if (c < mincount || --off >= 0)
                        continue;
                    if (--lim < 0)
                        break;
                    res.add(ft.indexedToReadable(terms[startTermIndex + i]), c);
                }
            }
        }
    
        if (missing) {
            res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale));
        }
    
        return res;
    }
    

    From source file:org.vootoo.search.function.ValueSourceCollectorFilter.java

    License:Apache License

    @Override
    public DocIdSet getDocIdSet(@SuppressWarnings("rawtypes") final Map context,
            final LeafReaderContext readerContext, Bits acceptDocs) throws IOException {
        collectorFilterable.doSetNextReader(context, readerContext);
        //TODO  check getDocIdSet use
        return BitsFilteredDocIdSet.wrap(new DocIdSet() {
            @Override/*from   ww w .j a v a 2 s . c om*/
            public long ramBytesUsed() {
                return 0;
            }
    
            @Override
            public DocIdSetIterator iterator() throws IOException {
                final DocIdSetIterator approximation = DocIdSetIterator.all(readerContext.reader().maxDoc()); // no approximation!
                TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approximation) {
                    @Override
                    public boolean matches() throws IOException {
                        return collectorFilterable.matches(approximation.docID());
                    }
    
                    @Override
                    public float matchCost() {
                        return 100; // TODO: use cost of ValueSourceScorer.this.matches()
                    }
                };
                return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
            }
    
            @Override
            public Bits bits() {
                return null; // don't use random access
            }
        }, acceptDocs);
    }
    

    From source file:org.voyanttools.trombone.tool.corpus.DocumentNgrams.java

    License:Open Source License

    List<DocumentNgram> getNgrams(CorpusMapper corpusMapper, Keywords stopwords) throws IOException {
        Corpus corpus = corpusMapper.getCorpus();
        int[] totalTokens = corpus.getLastTokenPositions(tokenType);
        FlexibleQueue<DocumentNgram> queue = new FlexibleQueue<DocumentNgram>(comparator, start + limit);
    
        Set<String> validIds = new HashSet<String>();
        validIds.addAll(this.getCorpusStoredDocumentIdsFromParameters(corpus));
        OverlapFilter filter = getDocumentNgramsOverlapFilter(parameters);
        DocIdSetIterator it = corpusMapper.getDocIdSet().iterator();
        while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            int luceneDoc = it.docID();
            String docId = corpusMapper.getDocumentIdFromLuceneId(luceneDoc);
            if (validIds.contains(docId) == false) {
                continue;
            }//from w  w w  . ja v a 2 s  .com
            int corpusDocumentIndex = corpusMapper.getDocumentPositionFromLuceneId(luceneDoc);
            int lastToken = totalTokens[corpusDocumentIndex];
    
            // build single grams as seed for ngrams
            SimplifiedTermInfo[] sparseSimplifiedTermInfoArray = getSparseSimplifiedTermInfoArray(corpusMapper,
                    luceneDoc, lastToken);
    
            Map<String, List<int[]>> stringPositionsMap = new HashMap<String, List<int[]>>();
            for (int i = 0, len = sparseSimplifiedTermInfoArray.length; i < len; i++) {
                if (sparseSimplifiedTermInfoArray[i] != null
                        && sparseSimplifiedTermInfoArray[i].term.isEmpty() == false) {
                    if (stringPositionsMap.containsKey(sparseSimplifiedTermInfoArray[i].term) == false) {
                        List<int[]> l = new ArrayList<int[]>();
                        l.add(new int[] { i, i });
                        stringPositionsMap.put(sparseSimplifiedTermInfoArray[i].term, l);
                    } else {
                        stringPositionsMap.get(sparseSimplifiedTermInfoArray[i].term).add(new int[] { i, i });
                    }
                }
            }
    
            List<DocumentNgram> ngrams = getNgramsFromStringPositions(stringPositionsMap, corpusDocumentIndex, 1);
            ngrams = getNextNgrams(ngrams, sparseSimplifiedTermInfoArray, corpusDocumentIndex, 2);
    
            ngrams = filter.getFilteredNgrams(ngrams, lastToken);
    
            for (DocumentNgram ngram : ngrams) {
                if (ngram.getLength() >= minLength && ngram.getLength() <= maxLength) {
                    queue.offer(ngram);
                }
            }
        }
    
        return queue.getOrderedList(start);
    
    }