Example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Prototype

int NO_MORE_DOCS

To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Click Source Link

Document

When returned by #nextDoc() , #advance(int) and #docID() it means there are no more docs in the iterator.

Usage

From source file:org.apache.solr.util.TestOpenBitSet.java

License:Apache License

void doIterate(BitSet a, OpenBitSet b) {
    int aa = -1, bb = -1;
    OpenBitSetIterator iterator = new OpenBitSetIterator(b);
    do {//from w  w  w  . j  ava 2s . co m
        aa = a.nextSetBit(aa + 1);
        if (rand.nextBoolean()) {
            iterator.nextDoc();
            bb = iterator.docID();
        } else {
            iterator.advance(bb + 1);
            bb = iterator.docID();
        }
        assertEquals(aa == -1 ? DocIdSetIterator.NO_MORE_DOCS : aa, bb);
    } while (aa >= 0);
}

From source file:org.archive.porky.FindAndIntersectionsUsingPForDeltaDocIdSetUDF.java

License:Apache License

public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0)
        return null;
    DataBag bagOfBags = (DataBag) input.get(0);
    DocSet pForDeltaDocSet = null;// w w w  . java2 s  .co m
    ArrayList<DocIdSet> docs = new ArrayList<DocIdSet>();
    try {
        for (Tuple t : bagOfBags) {
            DataBag bag = (DataBag) t.get(0);
            pForDeltaDocSet = DocSetFactory.getPForDeltaDocSetInstance();
            for (Tuple tup : bag) {
                if (tup != null && tup.size() == 1) {
                    pForDeltaDocSet.addDoc((Integer) tup.get(0));
                }
            }
            docs.add(pForDeltaDocSet);
        }

        ArrayList<Integer> intersectedIds = new ArrayList<Integer>();
        AndDocIdSet andSet = new AndDocIdSet(docs);
        DocIdSetIterator iter = andSet.iterator();
        int docId = iter.nextDoc();
        while (docId != DocIdSetIterator.NO_MORE_DOCS) {
            intersectedIds.add(docId);
            docId = iter.nextDoc();
        }

        //return bag of intersected IDs
        DataBag resultBag = new NonSpillableDataBag(intersectedIds.size());
        for (int Id : intersectedIds) {
            Tuple newTuple = TupleFactory.getInstance().newTuple(1);
            newTuple.set(0, new Integer(Id));
            resultBag.add(newTuple);
        }
        return resultBag;
    } catch (Exception e) {
        throw WrappedIOException.wrap("Caught exception processing input row ", e);
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.index.FilterableTermsEnum.java

License:Apache License

@Override
public boolean seekExact(BytesRef text) throws IOException {
    int docFreq = 0;
    long totalTermFreq = 0;
    for (Holder anEnum : enums) {
        if (anEnum.termsEnum.seekExact(text)) {
            if (anEnum.bits == null) {
                docFreq += anEnum.termsEnum.docFreq();
                if (docsEnumFlag == PostingsEnum.FREQS) {
                    long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq();
                    if (totalTermFreq == -1 || leafTotalTermFreq == -1) {
                        totalTermFreq = -1;
                        continue;
                    }//from   w  ww.  j a  v a2s  .  c  o  m
                    totalTermFreq += leafTotalTermFreq;
                }
            } else {
                final PostingsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.postings(anEnum.docsEnum,
                        docsEnumFlag);
                // 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not
                if (docsEnumFlag == PostingsEnum.FREQS) {
                    for (int docId = docsEnum
                            .nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                        if (anEnum.bits != null && anEnum.bits.get(docId) == false) {
                            continue;
                        }
                        docFreq++;
                        // docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value
                        // is really 1 or unrecorded when filtering like this
                        totalTermFreq += docsEnum.freq();
                    }
                } else {
                    for (int docId = docsEnum
                            .nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                        if (anEnum.bits != null && anEnum.bits.get(docId) == false) {
                            continue;
                        }
                        // docsEnum.freq() behaviour is undefined if docsEnumFlag==PostingsEnum.FLAG_NONE so don't bother with call
                        docFreq++;
                    }
                }
            }
        }
    }
    if (docFreq > 0) {
        currentDocFreq = docFreq;
        currentTotalTermFreq = totalTermFreq;
        current = text;
        return true;
    } else {
        currentDocFreq = NOT_FOUND;
        currentTotalTermFreq = NOT_FOUND;
        current = null;
        return false;
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.Lucene.java

License:Apache License

/**
 * Check whether there is one or more documents matching the provided query.
 *//*from  www.  j  a va  2s .co m*/
public static boolean exists(IndexSearcher searcher, Query query) throws IOException {
    final Weight weight = searcher.createNormalizedWeight(query, false);
    // the scorer API should be more efficient at stopping after the first
    // match than the bulk scorer API
    for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
        final Scorer scorer = weight.scorer(context);
        if (scorer == null) {
            continue;
        }
        final Bits liveDocs = context.reader().getLiveDocs();
        final DocIdSetIterator iterator = scorer.iterator();
        for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
            if (liveDocs == null || liveDocs.get(doc)) {
                return true;
            }
        }
    }
    return false;
}

From source file:org.codelibs.elasticsearch.common.lucene.search.XMoreLikeThis.java

License:Apache License

/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 * @param fieldName Optional field name of the terms for skip terms
 *///  w  w  w .  j  av a2s .  c  om
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName)
        throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while ((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        if (isSkipTerm(fieldName, term)) {
            continue;
        }

        final PostingsEnum docs = termsEnum.postings(null);
        int freq = 0;
        while (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            freq += docs.freq();
        }

        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.uid.PerThreadIDAndVersionLookup.java

License:Apache License

/** Return null if id is not found. */
public DocIdAndVersion lookup(BytesRef id, Bits liveDocs, LeafReaderContext context) throws IOException {
    if (termsEnum.seekExact(id)) {
        // there may be more than one matching docID, in the case of nested docs, so we want the last one:
        docsEnum = termsEnum.postings(docsEnum, 0);
        int docID = DocIdSetIterator.NO_MORE_DOCS;
        for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
            if (liveDocs != null && liveDocs.get(d) == false) {
                continue;
            }//from w ww  .jav  a 2  s.  co  m
            docID = d;
        }

        if (docID != DocIdSetIterator.NO_MORE_DOCS) {
            return new DocIdAndVersion(docID, versions.get(docID), context);
        }
    }

    return null;
}

From source file:org.codelibs.elasticsearch.search.aggregations.bucket.nested.ReverseNestedAggregator.java

License:Apache License

@Override
protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub)
        throws IOException {
    // In ES if parent is deleted, then also the children are deleted, so the child docs this agg receives
    // must belong to parent docs that is alive. For this reason acceptedDocs can be null here.
    final BitSet parentDocs = parentBitsetProducer.getBitSet(ctx);
    if (parentDocs == null) {
        return LeafBucketCollector.NO_OP_COLLECTOR;
    }//from   ww  w  .jav a  2  s .  co  m
    final LongIntHashMap bucketOrdToLastCollectedParentDoc = new LongIntHashMap(32);
    return new LeafBucketCollectorBase(sub, null) {
        @Override
        public void collect(int childDoc, long bucket) throws IOException {
            // fast forward to retrieve the parentDoc this childDoc belongs to
            final int parentDoc = parentDocs.nextSetBit(childDoc);
            assert childDoc <= parentDoc && parentDoc != DocIdSetIterator.NO_MORE_DOCS;

            int keySlot = bucketOrdToLastCollectedParentDoc.indexOf(bucket);
            if (bucketOrdToLastCollectedParentDoc.indexExists(keySlot)) {
                int lastCollectedParentDoc = bucketOrdToLastCollectedParentDoc.indexGet(keySlot);
                if (parentDoc > lastCollectedParentDoc) {
                    collectBucket(sub, parentDoc, bucket);
                    bucketOrdToLastCollectedParentDoc.indexReplace(keySlot, parentDoc);
                }
            } else {
                collectBucket(sub, parentDoc, bucket);
                bucketOrdToLastCollectedParentDoc.indexInsert(keySlot, bucket, parentDoc);
            }
        }
    };
}

From source file:org.cosmo.common.model.PublicFolder.java

License:Apache License

public static long[] getAllItemRecordIds(org.cosmo.common.xml.XML rootFolder) {
    try {//from  ww  w .j a  v  a2s  .  c om
        // track duplicates - faster than HashSet
        OpenBitSet recordIdSet = getAllItemRecordIdSet(rootFolder);
        LongArrayList recordIds = new LongArrayList((int) recordIdSet.cardinality());
        DocIdSetIterator recordIdIterator = recordIdSet.iterator();
        for (int docId = recordIdIterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS;) {
            recordIds.add((long) docId);
            docId = recordIdIterator.nextDoc();
        }
        return recordIds.elements();
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}

From source file:org.cosmo.common.record.SearchResult.java

License:Apache License

public long[] records(boolean load) throws Exception {
    if (Log.jlucene.getLevel() == java.util.logging.Level.FINE) {
        Log.jlucene.fine(Fmt.S("%s Start Fetching Records", Clock.timer().markAndCheckRunning()));
    }//from ww w . jav  a2 s . com

    DocIdSetIterator resultsIterator = _result.iterator();
    Search search = _context._searchField._declaringMeta.search();

    LongArrayList ids = new LongArrayList();

    for (int docId = resultsIterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS;) {
        ids.add((long) docId);
        docId = resultsIterator.nextDoc();
    }

    ids.trimToSize();
    // XXX REVMOE assertAndCorrectIds() ONCE BUG IS FIXED!!!!
    //assertAndCorrectIds(search, ids);
    //Sorting.quickSort(ids.elements(), 0, ids.size(), IdComparator);

    throw new RuntimeException("fix below commenouted out line due to refactoring");
    /*Sorting.mergeSort(ids.elements(), 0, ids.size(), RssContent.PubDateComparator.Instance);
            
            
            
            
    if (Log.jlucene.getLevel() == ariba.util.log.Log.DebugLevel) {
      Log.jlucene.debug("%s Done Fetching Records", Clock.timer().markAndCheckRunning());
    }
    return ids.elements();
    */
}

From source file:org.elasticsearch.action.termvectors.TermVectorsWriter.java

License:Apache License

private PostingsEnum writeTermWithDocsOnly(TermsEnum iterator, PostingsEnum docsEnum) throws IOException {
    docsEnum = iterator.postings(docsEnum);
    int nextDoc = docsEnum.nextDoc();
    assert nextDoc != DocIdSetIterator.NO_MORE_DOCS;
    writeFreq(docsEnum.freq());/*w w  w  . ja v  a 2  s  .  c  o m*/
    nextDoc = docsEnum.nextDoc();
    assert nextDoc == DocIdSetIterator.NO_MORE_DOCS;
    return docsEnum;
}