Example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Prototype

int NO_MORE_DOCS

To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Click Source Link

Document

When returned by #nextDoc() , #advance(int) and #docID() it means there are no more docs in the iterator.

Usage

From source file:DocIdSetBenchmark.java

License:Apache License

private static int exhaustIterator(DocIdSet set) throws IOException {
    int dummy = 0;
    final DocIdSetIterator it = set.iterator();
    for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
        dummy += doc;//  ww w  .  ja v a  2  s.  com
    }
    return dummy;
}

From source file:DocIdSetBenchmark.java

License:Apache License

private static int exhaustIterator(DocIdSet set, int increment) throws IOException {
    int dummy = 0;
    final DocIdSetIterator it = set.iterator();
    for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.advance(doc + increment)) {
        dummy += doc;/*from   w  ww . j av a  2  s.  c  om*/
    }
    return dummy;
}

From source file:arena.lucene.LuceneIndexSearcherImpl.java

License:Open Source License

protected TopDocs executeSearch(IndexSearcher searcher, Query query, Filter filter, Sort sort,
        int collectorLimit) throws IOException {
    // Decide on how to search based on which elements of the lucene query model are available
    if (query != null) {
        // Full scoring search
        TopDocsCollector<? extends ScoreDoc> collector = null;
        if (sort == null) {
            collector = TopScoreDocCollector.create(collectorLimit, true);
        } else {//from   w  ww .  j a v  a 2 s . co  m
            SortField sortFields[] = sort.getSort();
            if (sortFields != null && sortFields.length > 0 && sortFields[0].getType() == SortField.SCORE
                    && !sortFields[0].getReverse()) {
                collector = TopScoreDocCollector.create(collectorLimit, true);
            } else {
                collector = TopFieldCollector.create(sort, collectorLimit, false, true, true, true);
            }
        }
        searcher.search(query, filter, collector);
        return collector.topDocs();

    } else if (filter != null) {
        // No query = no need for scoring, just dump the results into a hit collector that runs 
        // off the results in the order we want 
        DocIdSetIterator filterMatchesIterator = filter.getDocIdSet(searcher.getIndexReader()).iterator();
        if (sort == null) {
            // no sort available, so the natural iteration order is fine
            // if we have an iterator that means sorting is already handled, so just pull off the first n rows into the output
            ScoreDoc[] scoreDocs = new ScoreDoc[collectorLimit];
            int found = 0;
            int docId;
            while (found < collectorLimit
                    && (docId = filterMatchesIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                scoreDocs[found++] = new ScoreDoc(docId, 1f);
            }
            return new TopDocs(found, found < collectorLimit ? Arrays.copyOf(scoreDocs, found) : scoreDocs, 1f);
        } else {
            TopDocsCollector<? extends ScoreDoc> collector = TopFieldCollector.create(sort, collectorLimit,
                    false, true, true, true);
            int docId;
            while ((docId = filterMatchesIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                collector.collect(docId);
            }
            return collector.topDocs();

        }
    } else if (sort != null) {
        // no query and no filter so no score but add every doc in the index for non-score sorting            
        TopDocsCollector<? extends ScoreDoc> collector = TopFieldCollector.create(sort, collectorLimit, false,
                true, true, true);
        int numDocs = searcher.getIndexReader().numDocs();
        for (int n = 0; n < numDocs; n++) {
            collector.collect(n);
        }
        return collector.topDocs();
    } else {
        // no query filter or sort: return the top n docs
        ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(collectorLimit, searcher.getIndexReader().numDocs())];

        for (int n = 0; n < scoreDocs.length; n++) {
            scoreDocs[n] = new ScoreDoc(n, 1f);
        }
        return new TopDocs(scoreDocs.length, scoreDocs, 1f);
    }
}

From source file:BlockBuilding.AbstractBlockBuilding.java

License:Apache License

protected Map<String, int[]> parseD1Index(IndexReader d1Index, IndexReader d2Index) {
    try {/*  w  w w.  j  av a  2 s .  co  m*/
        int[] documentIds = getDocumentIds(d1Index);
        final Map<String, int[]> hashedBlocks = new HashMap<>();
        Fields fields = MultiFields.getFields(d1Index);
        for (String field : fields) {
            Terms terms = fields.terms(field);
            TermsEnum termsEnum = terms.iterator();
            BytesRef text;
            while ((text = termsEnum.next()) != null) {
                // check whether it is a common term
                int d2DocFrequency = d2Index.docFreq(new Term(field, text));
                if (d2DocFrequency == 0) {
                    continue;
                }

                final List<Integer> entityIds = new ArrayList<>();
                PostingsEnum pe = MultiFields.getTermDocsEnum(d1Index, field, text);
                int doc;
                while ((doc = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    entityIds.add(documentIds[doc]);
                }

                int[] idsArray = Converter.convertCollectionToArray(entityIds);
                hashedBlocks.put(text.utf8ToString(), idsArray);
            }
        }
        return hashedBlocks;
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:BlockBuilding.AbstractBlockBuilding.java

License:Apache License

protected void parseD2Index(IndexReader d2Index, Map<String, int[]> hashedBlocks) {
    try {//from  ww  w  . j  av a2 s.co m
        int[] documentIds = getDocumentIds(d2Index);
        Fields fields = MultiFields.getFields(d2Index);
        for (String field : fields) {
            Terms terms = fields.terms(field);
            TermsEnum termsEnum = terms.iterator();
            BytesRef text;
            while ((text = termsEnum.next()) != null) {
                if (!hashedBlocks.containsKey(text.utf8ToString())) {
                    continue;
                }

                final List<Integer> entityIds = new ArrayList<>();
                PostingsEnum pe = MultiFields.getTermDocsEnum(d2Index, field, text);
                int doc;
                while ((doc = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    entityIds.add(documentIds[doc]);
                }

                int[] idsArray = Converter.convertCollectionToArray(entityIds);
                int[] d1Entities = hashedBlocks.get(text.utf8ToString());
                blocks.add(new BilateralBlock(d1Entities, idsArray));
            }
        }

    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, null, ex);
    }
}

From source file:BlockBuilding.AbstractBlockBuilding.java

License:Apache License

protected void parseIndex(IndexReader d1Index) {
    try {/*from w w  w  . j a  v  a  2s  .  c o  m*/
        int[] documentIds = getDocumentIds(d1Index);
        Fields fields = MultiFields.getFields(d1Index);
        for (String field : fields) {
            Terms terms = fields.terms(field);
            TermsEnum termsEnum = terms.iterator();
            BytesRef text;
            while ((text = termsEnum.next()) != null) {
                if (termsEnum.docFreq() < 2) {
                    continue;
                }

                final List<Integer> entityIds = new ArrayList<>();
                PostingsEnum pe = MultiFields.getTermDocsEnum(d1Index, field, text);
                int doc;
                while ((doc = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    entityIds.add(documentIds[doc]);
                }

                int[] idsArray = Converter.convertCollectionToArray(entityIds);
                UnilateralBlock block = new UnilateralBlock(idsArray);
                blocks.add(block);
            }
        }
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, null, ex);
    }
}

From source file:BlockBuilding.SortedNeighborhoodBlocking.java

License:Apache License

protected List<Integer> getTermEntities(int[] docIds, IndexReader iReader, String blockingKey) {
    try {//from   w w w  .java  2  s  .co  m
        Term term = new Term(VALUE_LABEL, blockingKey);
        List<Integer> entityIds = new ArrayList<>();
        int docFrequency = iReader.docFreq(term);
        if (0 < docFrequency) {
            BytesRef text = term.bytes();
            PostingsEnum pe = MultiFields.getTermDocsEnum(iReader, VALUE_LABEL, text);
            int doc;
            while ((doc = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                entityIds.add(docIds[doc]);
            }
        }

        return entityIds;
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:cn.hbu.cs.esearch.document.UIDDocIdSet.java

License:Apache License

@Override
public DocIdSetIterator iterator() {
    return new DocIdSetIterator() {
        int doc = -1;
        int current = -1;

        @Override/*from   w w  w . java 2  s  .co  m*/
        public int docID() {
            return doc;
        }

        @Override
        public int nextDoc() throws IOException {
            if (current < sorted.length - 1) {
                current++;
                doc = sorted[current];
                return doc;
            }
            return DocIdSetIterator.NO_MORE_DOCS;
        }

        @Override
        public int advance(int target) throws IOException {
            int idx = Arrays.binarySearch(sorted, target);
            if (idx < 0) {
                idx = -(idx + 1);
                if (idx >= sorted.length) {
                    return DocIdSetIterator.NO_MORE_DOCS;
                }
            }
            current = idx;
            doc = sorted[current];
            return doc;
        }

        // No use, just implement abstract function
        @Override
        public long cost() {
            return 0;
        }
    };
}

From source file:cn.hbu.cs.esearch.store.LuceneStore.java

License:Apache License

@Override
protected void persistDelete(long uid) throws IOException {
    final int docid = mapDocId(uid);
    if (docid < 0) {
        return;//  w w  w  . j  a  v a  2s. c  om
    }

    Query deleteQ = new ConstantScoreQuery(new Filter() {

        @Override
        public DocIdSet getDocIdSet(AtomicReaderContext readerCtx, Bits acceptedDocs) throws IOException {
            return new DocIdSet() {

                @Override
                public DocIdSetIterator iterator() throws IOException {
                    return new DocIdSetIterator() {
                        int currId = -1;

                        @Override
                        public int nextDoc() throws IOException {
                            if (currId == -1) {
                                currId = docid;
                            } else {
                                currId = DocIdSetIterator.NO_MORE_DOCS;
                            }
                            return currId;
                        }

                        @Override
                        public int docID() {
                            return currId;
                        }

                        @Override
                        public int advance(int target) throws IOException {
                            if (currId != DocIdSetIterator.NO_MORE_DOCS) {
                                if (target < docid) {
                                    currId = docid;
                                } else {
                                    currId = DocIdSetIterator.NO_MORE_DOCS;
                                }
                            }
                            return currId;
                        }

                        @Override
                        public long cost() {
                            // TODO Auto-generated method stub
                            return 0;
                        }
                    };
                }

            };
        }

    });
    indexWriter.deleteDocuments(deleteQ);
    if (currentReaderData != null) {
        currentReaderData.uidMap.remove(uid);
    }

}

From source file:cn.hbu.cs.esearch.util.ArrayDocIdSet.java

License:Apache License

@Override
public DocIdSetIterator iterator() {
    return new DocIdSetIterator() {
        int doc = -1;
        int current = -1;
        int largest = lengthMinusone;

        @Override// w  ww  .  j  av a2  s. co  m
        public int docID() {
            return doc;
        }

        @Override
        public int nextDoc() throws IOException {
            if (current < lengthMinusone) {
                current++;
                doc = docIds[current];
                return doc;
            }
            return DocIdSetIterator.NO_MORE_DOCS;
        }

        @Override
        public int advance(int target) throws IOException {
            int idx = current < 0 ? binarySearch(docIds, target)
                    : binarySearch(docIds, target, current, largest);
            if (idx < 0) {
                idx = -(idx + 1);
                if (idx >= docIds.length) {
                    return DocIdSetIterator.NO_MORE_DOCS;
                }
            }
            current = idx;
            doc = docIds[current];
            return doc;
        }

        @Override
        public long cost() {
            // TODO Auto-generated method stub
            return 0;
        }
    };
}