Example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Prototype

int NO_MORE_DOCS

To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Click Source Link

Document

When returned by #nextDoc() , #advance(int) and #docID() it means there are no more docs in the iterator.

Usage

From source file:org.apache.solr.request.DocValuesStats.java

License:Apache License

/** accumulates per-segment single-valued stats */
static int accumSingle(int counts[], int docBase, FieldFacetStats[] facetStats, SortedDocValues si,
        DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
    final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
    int missingDocCount = 0;
    int doc;/*from  w w w. ja  va  2s  .  c  o  m*/
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        int term = si.getOrd(doc);
        if (term >= 0) {
            if (map != null) {
                term = (int) ordMap.get(term);
            }
            counts[term]++;
            for (FieldFacetStats f : facetStats) {
                f.facetTermNum(docBase + doc, term);
            }
        } else {
            for (FieldFacetStats f : facetStats) {
                f.facetMissingNum(docBase + doc);
            }

            missingDocCount++;
        }
    }
    return missingDocCount;
}

From source file:org.apache.solr.request.DocValuesStats.java

License:Apache License

/** accumulates per-segment multi-valued stats */

static int accumMulti(int counts[], int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si,
        DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
    final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
    int missingDocCount = 0;
    int doc;//from  w  w w . j a  v  a 2  s .c o m
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        si.setDocument(doc);
        long ord;
        boolean emptyTerm = true;
        while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
            emptyTerm = false;
            int term = (int) ord;
            if (map != null) {
                term = (int) ordMap.get(term);
            }
            counts[term]++;
            for (FieldFacetStats f : facetStats) {
                f.facetTermNum(docBase + doc, term);
            }
        }
        if (emptyTerm) {
            for (FieldFacetStats f : facetStats) {
                f.facetMissingNum(docBase + doc);
            }

            missingDocCount++;
        }
    }

    return missingDocCount;
}

From source file:org.apache.solr.request.IntervalFacets.java

License:Apache License

private void accumIntervalsMulti(SortedSetDocValues ssdv, DocIdSetIterator disi, Bits bits) throws IOException {
    // First update the ordinals in the intervals for this segment
    for (FacetInterval interval : intervals) {
        interval.updateContext(ssdv);//from www. j a v  a2  s  .  c o  m
    }

    int doc;
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        if (bits != null && bits.get(doc) == false) {
            continue;
        }
        ssdv.setDocument(doc);
        long currOrd;
        int currentInterval = 0;
        while ((currOrd = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
            boolean evaluateNextInterval = true;
            while (evaluateNextInterval && currentInterval < intervals.length) {
                IntervalCompareResult result = intervals[currentInterval].includes(currOrd);
                switch (result) {
                case INCLUDED:
                    /*
                     * Increment the current interval and move to the next one using
                     * the same value
                     */
                    intervals[currentInterval].incCount();
                    currentInterval++;
                    break;
                case LOWER_THAN_START:
                    /*
                     * None of the next intervals will match this value (all of them have 
                     * higher start value). Move to the next value for this document. 
                     */
                    evaluateNextInterval = false;
                    break;
                case GREATER_THAN_END:
                    /*
                     * Next interval may match this value
                     */
                    currentInterval++;
                    break;
                }
            }
        }
    }
}

From source file:org.apache.solr.request.IntervalFacets.java

License:Apache License

private void accumIntervalsSingle(SortedDocValues sdv, DocIdSetIterator disi, Bits bits) throws IOException {
    // First update the ordinals in the intervals to this segment
    for (FacetInterval interval : intervals) {
        interval.updateContext(sdv);//from   w w w .j  a  v  a2s .  com
    }
    int doc;
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        if (bits != null && bits.get(doc) == false) {
            continue;
        }
        int ord = sdv.getOrd(doc);
        if (ord >= 0) {
            accumInterval(ord);
        }
    }
}

From source file:org.apache.solr.request.SimpleFacets.java

License:Apache License

/**
 * Returns a list of terms in the specified field along with the 
 * corresponding count of documents in the set that match that constraint.
 * This method uses the FilterCache to get the intersection count between <code>docs</code>
 * and the DocSet for each term in the filter.
 *
 * @see FacetParams#FACET_LIMIT//from   w  w  w.  j av a 2 s .c  om
 * @see FacetParams#FACET_ZEROS
 * @see FacetParams#FACET_MISSING
 */
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {

    /* :TODO: potential optimization...
    * cache the Terms with the highest docFreq and try them first
    * don't enum if we get our max from them
    */

    // Minimum term docFreq in order to use the filterCache for that term.
    int minDfFilterCache = params.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);

    // make sure we have a set that is fast for random access, if we will use it for that
    DocSet fastForRandomSet = docs;
    if (minDfFilterCache > 0 && docs instanceof SortedIntDocSet) {
        SortedIntDocSet sset = (SortedIntDocSet) docs;
        fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
    }

    IndexSchema schema = searcher.getSchema();
    AtomicReader r = searcher.getAtomicReader();
    FieldType ft = schema.getFieldType(field);

    boolean sortByCount = sort.equals("count") || sort.equals("true");
    final int maxsize = limit >= 0 ? offset + limit : Integer.MAX_VALUE - 1;
    final BoundedTreeSet<CountPair<BytesRef, Integer>> queue = sortByCount
            ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(maxsize)
            : null;
    final NamedList<Integer> res = new NamedList<Integer>();

    int min = mincount - 1; // the smallest value in the top 'N' values    
    int off = offset;
    int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

    BytesRef startTermBytes = null;
    if (prefix != null) {
        String indexedPrefix = ft.toInternal(prefix);
        startTermBytes = new BytesRef(indexedPrefix);
    }

    Fields fields = r.fields();
    Terms terms = fields == null ? null : fields.terms(field);
    TermsEnum termsEnum = null;
    SolrIndexSearcher.DocsEnumState deState = null;
    BytesRef term = null;
    if (terms != null) {
        termsEnum = terms.iterator(null);

        // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
        // facet.offset when sorting by index order.

        if (startTermBytes != null) {
            if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
            }
        } else {
            // position termsEnum on first term
            term = termsEnum.next();
        }
    }

    DocsEnum docsEnum = null;
    CharsRef charsRef = new CharsRef(10);

    if (docs.size() >= mincount) {
        while (term != null) {

            if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes))
                break;

            int df = termsEnum.docFreq();

            // If we are sorting, we can use df>min (rather than >=) since we
            // are going in index order.  For certain term distributions this can
            // make a large difference (for example, many terms with df=1).
            if (df > 0 && df > min) {
                int c;

                if (df >= minDfFilterCache) {
                    // use the filter cache

                    if (deState == null) {
                        deState = new SolrIndexSearcher.DocsEnumState();
                        deState.fieldName = field;
                        deState.liveDocs = r.getLiveDocs();
                        deState.termsEnum = termsEnum;
                        deState.docsEnum = docsEnum;
                    }

                    c = searcher.numDocs(docs, deState);

                    docsEnum = deState.docsEnum;
                } else {
                    // iterate over TermDocs to calculate the intersection

                    // TODO: specialize when base docset is a bitset or hash set (skipDocs)?  or does it matter for this?
                    // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
                    // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet?
                    docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
                    c = 0;

                    if (docsEnum instanceof MultiDocsEnum) {
                        MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum) docsEnum).getSubs();
                        int numSubs = ((MultiDocsEnum) docsEnum).getNumSubs();
                        for (int subindex = 0; subindex < numSubs; subindex++) {
                            MultiDocsEnum.EnumWithSlice sub = subs[subindex];
                            if (sub.docsEnum == null)
                                continue;
                            int base = sub.slice.start;
                            int docid;
                            while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                if (fastForRandomSet.exists(docid + base))
                                    c++;
                            }
                        }
                    } else {
                        int docid;
                        while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (fastForRandomSet.exists(docid))
                                c++;
                        }
                    }

                }

                if (sortByCount) {
                    if (c > min) {
                        BytesRef termCopy = BytesRef.deepCopyOf(term);
                        queue.add(new CountPair<BytesRef, Integer>(termCopy, c));
                        if (queue.size() >= maxsize)
                            min = queue.last().val;
                    }
                } else {
                    if (c >= mincount && --off < 0) {
                        if (--lim < 0)
                            break;
                        ft.indexedToReadable(term, charsRef);
                        res.add(charsRef.toString(), c);
                    }
                }
            }

            term = termsEnum.next();
        }
    }

    if (sortByCount) {
        for (CountPair<BytesRef, Integer> p : queue) {
            if (--off >= 0)
                continue;
            if (--lim < 0)
                break;
            ft.indexedToReadable(p.key, charsRef);
            res.add(charsRef.toString(), p.val);
        }
    }

    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, field));
    }

    return res;
}

From source file:org.apache.solr.response.SortingResponseWriter.java

License:Apache License

public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse res) throws IOException {
    Exception e1 = res.getException();
    if (e1 != null) {
        e1.printStackTrace(new PrintWriter(writer));
        return;//from w  w w  .j  ava 2s  . com
    }
    SolrRequestInfo info = SolrRequestInfo.getRequestInfo();
    SortSpec sortSpec = info.getResponseBuilder().getSortSpec();

    if (sortSpec == null) {
        throw new IOException(new SyntaxError("No sort criteria was provided."));
    }

    SolrIndexSearcher searcher = req.getSearcher();
    Sort sort = searcher.weightSort(sortSpec.getSort());

    if (sort == null) {
        throw new IOException(new SyntaxError("No sort criteria was provided."));
    }

    if (sort.needsScores()) {
        throw new IOException(new SyntaxError("Scoring is not currently supported with xsort."));
    }

    FixedBitSet[] sets = (FixedBitSet[]) req.getContext().get("export");
    Integer th = (Integer) req.getContext().get("totalHits");

    if (sets == null) {
        throw new IOException(new SyntaxError("xport RankQuery is required for xsort: rq={!xport}"));
    }

    int totalHits = th.intValue();
    SolrParams params = req.getParams();
    String fl = params.get("fl");

    if (fl == null) {
        throw new IOException(new SyntaxError("export field list (fl) must be specified."));
    }

    String[] fields = fl.split(",");

    for (int i = 0; i < fields.length; i++) {
        if (fl.trim().equals("score")) {
            throw new IOException(new SyntaxError("Scoring is not currently supported with xsort."));
        }
    }

    FieldWriter[] fieldWriters = getFieldWriters(fields, req.getSearcher());
    writer.write(
            "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":" + totalHits + ", \"docs\":[");

    //Write the data.
    List<AtomicReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves();
    SortDoc sortDoc = getSortDoc(req.getSearcher(), sort.getSort());
    int count = 0;
    int queueSize = 30000;
    SortQueue queue = new SortQueue(queueSize, sortDoc);
    SortDoc[] outDocs = new SortDoc[queueSize];

    boolean commaNeeded = false;
    while (count < totalHits) {
        //long begin = System.nanoTime();
        queue.reset();
        SortDoc top = queue.top();
        for (int i = 0; i < leaves.size(); i++) {
            sortDoc.setNextReader(leaves.get(i));
            DocIdSetIterator it = sets[i].iterator();
            int docId = -1;
            while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                sortDoc.setValues(docId);
                if (top.lessThan(sortDoc)) {
                    top.setValues(sortDoc);
                    top = queue.updateTop();
                }
            }
        }

        int outDocsIndex = -1;

        for (int i = 0; i < queueSize; i++) {
            SortDoc s = queue.pop();
            if (s.docId > -1) {
                outDocs[++outDocsIndex] = s;
            }
        }

        //long end = System.nanoTime();

        count += (outDocsIndex + 1);

        try {
            for (int i = outDocsIndex; i >= 0; --i) {
                SortDoc s = outDocs[i];
                if (commaNeeded) {
                    writer.write(',');
                }
                writer.write('{');
                writeDoc(s, leaves, fieldWriters, sets, writer);
                writer.write('}');
                commaNeeded = true;
                s.reset();
            }
        } catch (Throwable e) {
            Throwable ex = e;
            while (ex != null) {
                String m = ex.getMessage();
                if (m != null && m.contains("Broken pipe")) {
                    logger.info("Early client disconnect during export");
                    return;
                }
                ex = ex.getCause();
            }

            if (e instanceof IOException) {
                throw ((IOException) e);
            } else {
                throw new IOException(e);
            }
        }
    }

    //System.out.println("Sort Time 2:"+Long.toString(total/1000000));
    writer.write("]}}");
    writer.flush();
}

From source file:org.apache.solr.schema.TestPointFields.java

License:Apache License

private void doTestInternals(String field, String[] values) throws IOException {
    assertTrue(h.getCore().getLatestSchema().getField(field).getType() instanceof PointField);
    for (int i = 0; i < 10; i++) {
        assertU(adoc("id", String.valueOf(i), field, values[i]));
    }//  ww  w.j  a va 2 s . co  m
    assertU(commit());
    IndexReader ir;
    RefCounted<SolrIndexSearcher> ref = null;
    SchemaField sf = h.getCore().getLatestSchema().getField(field);
    boolean ignoredField = !(sf.indexed() || sf.stored() || sf.hasDocValues());
    try {
        ref = h.getCore().getSearcher();
        SolrIndexSearcher searcher = ref.get();
        ir = searcher.getIndexReader();
        // our own SlowCompositeReader to check DocValues on disk w/o the UninvertingReader added by SolrIndexSearcher
        final LeafReader leafReaderForCheckingDVs = SlowCompositeReaderWrapper.wrap(searcher.getRawReader());

        if (sf.indexed()) {
            assertEquals("Field " + field + " should have point values", 10, PointValues.size(ir, field));
        } else {
            assertEquals("Field " + field + " should have no point values", 0, PointValues.size(ir, field));
        }
        if (ignoredField) {
            assertTrue("Field " + field + " should not have docValues",
                    DocValues.getSortedNumeric(leafReaderForCheckingDVs, field)
                            .nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
            assertTrue("Field " + field + " should not have docValues", DocValues
                    .getNumeric(leafReaderForCheckingDVs, field).nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
            assertTrue("Field " + field + " should not have docValues", DocValues
                    .getSorted(leafReaderForCheckingDVs, field).nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
            assertTrue("Field " + field + " should not have docValues", DocValues
                    .getBinary(leafReaderForCheckingDVs, field).nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
        } else {
            if (sf.hasDocValues()) {
                if (sf.multiValued()) {
                    assertFalse("Field " + field + " should have docValues",
                            DocValues.getSortedNumeric(leafReaderForCheckingDVs, field)
                                    .nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                } else {
                    assertFalse("Field " + field + " should have docValues",
                            DocValues.getNumeric(leafReaderForCheckingDVs, field)
                                    .nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                }
            } else {
                expectThrows(IllegalStateException.class,
                        () -> DocValues.getSortedNumeric(leafReaderForCheckingDVs, field));
                expectThrows(IllegalStateException.class,
                        () -> DocValues.getNumeric(leafReaderForCheckingDVs, field));
            }
            expectThrows(IllegalStateException.class,
                    () -> DocValues.getSorted(leafReaderForCheckingDVs, field));
            expectThrows(IllegalStateException.class,
                    () -> DocValues.getBinary(leafReaderForCheckingDVs, field));
        }
        for (LeafReaderContext leave : ir.leaves()) {
            LeafReader reader = leave.reader();
            for (int i = 0; i < reader.numDocs(); i++) {
                Document doc = reader.document(i);
                if (sf.stored()) {
                    assertNotNull("Field " + field + " not found. Doc: " + doc, doc.get(field));
                } else {
                    assertNull(doc.get(field));
                }
            }
        }
    } finally {
        ref.decref();
    }
    clearIndex();
    assertU(commit());
}

From source file:org.apache.solr.search.BitDocSet.java

License:Apache License

/*** DocIterator using nextSetBit()
public DocIterator iterator() {/*from  w  ww  .ja  v  a 2 s. com*/
  return new DocIterator() {
    int pos=bits.nextSetBit(0);
    public boolean hasNext() {
return pos>=0;
    }
        
    public Integer next() {
return nextDoc();
    }
        
    public void remove() {
bits.clear(pos);
    }
        
    public int nextDoc() {
int old=pos;
pos=bits.nextSetBit(old+1);
return old;
    }
        
    public float score() {
return 0.0f;
    }
  };
}
***/

@Override
public DocIterator iterator() {
    return new DocIterator() {
        private final OpenBitSetIterator iter = new OpenBitSetIterator(bits);
        private int pos = iter.nextDoc();

        @Override
        public boolean hasNext() {
            return pos != DocIdSetIterator.NO_MORE_DOCS;
        }

        @Override
        public Integer next() {
            return nextDoc();
        }

        @Override
        public void remove() {
            bits.clear(pos);
        }

        @Override
        public int nextDoc() {
            int old = pos;
            pos = iter.nextDoc();
            return old;
        }

        @Override
        public float score() {
            return 0.0f;
        }
    };
}

From source file:org.apache.solr.search.CitationLRUCache.java

License:Apache License

private void unInvertedTheDamnThing(AtomicReader reader, Map<String, List<String>> fields, Bits liveDocs,
        KVSetter setter) throws IOException {

    if (liveDocs == null) {
        liveDocs = reader.getLiveDocs();
    }/*from   w  w  w .java  2s . com*/

    int docBase = reader.getContext().docBase;
    //System.out.println("***REBUILDING***");
    //System.out.println("Generating mapping from: " + reader.toString() + " docBase=" + docBase);

    // load multiple values->idlucene mapping
    for (String idField : fields.get("intFieldsMV")) {
        DocTermOrds unInvertedIndex = new DocTermOrds(reader, liveDocs, idField);
        TermsEnum termsEnum = unInvertedIndex.getOrdTermsEnum(reader);
        if (termsEnum == null) {
            continue;
        }
        DocsEnum docs = null;
        for (;;) {
            BytesRef term = termsEnum.next();
            if (term == null)
                break;

            Integer t = FieldCache.DEFAULT_INT_PARSER.parseInt(term);

            docs = termsEnum.docs(liveDocs, docs, 0); // we don't need docFreq
            int i = 0;
            for (;;) {
                int d = docs.nextDoc();
                if (d == DocIdSetIterator.NO_MORE_DOCS) {
                    break;
                }

                setter.set(docBase, d, treatIdentifiersAsText ? Integer.toString(t) : t);

                i += 1;
                //if (i > 1) {
                //   log.warn("The term {} is used by more than one document {} ; your cache has problems", t, d+docBase);
                //}
            }
        }
    }

    /*
     * Read every term
     *    - for each term get all live documents
     *       - and do something with the pair: (docid, term)
     */
    for (String idField : fields.get("textFieldsMV")) {
        DocTermOrds unInvertedIndex = new DocTermOrds(reader, liveDocs, idField);
        TermsEnum termsEnum = unInvertedIndex.getOrdTermsEnum(reader);
        if (termsEnum == null) {
            continue;
        }
        DocsEnum docs = null;
        for (;;) {
            BytesRef term = termsEnum.next();
            if (term == null)
                break;
            String t = term.utf8ToString();

            docs = termsEnum.docs(liveDocs, docs, 0); // we don't need docFreq
            for (;;) {
                int d = docs.nextDoc();
                if (d == DocIdSetIterator.NO_MORE_DOCS) {
                    break;
                }

                setter.set(docBase, d, t);

                //if (i > 1) {
                //   log.warn("The term {} is used by more than one document {} ; your cache has problems", t, d+docBase);
                //}
            }
        }
    }

    // load single valued ids 
    for (String idField : fields.get("textFields")) {
        BinaryDocValues idMapping = getCacheReuseExisting(reader, idField);

        Integer i = 0;
        BytesRef ret = new BytesRef();
        while (i < reader.maxDoc()) {
            if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) {
                //System.out.println("skipping: " + i);
                i++;
                continue;
            }
            idMapping.get(i, ret);
            if (ret.length > 0) {
                setter.set(docBase, i, ret.utf8ToString()); // in this case, docbase will always be 0
            }
            i++;
        }
        if (purgeCache)
            FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
    }
    for (String idField : fields.get("intFields")) {
        Ints idMapping = FieldCache.DEFAULT.getInts(reader, idField, false);
        Integer i = 0;
        while (i < reader.maxDoc()) {
            if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) {
                //System.out.println("skipping: " + i);
                i++;
                continue;
            }
            setter.set(docBase, i,
                    treatIdentifiersAsText ? Integer.toString(idMapping.get(i)) : idMapping.get(i));
            i++;
        }
    }

}

From source file:org.apache.solr.search.DocSetBuilder.java

License:Apache License

public void add(DocIdSetIterator iter, int base) throws IOException {
    grow((int) Math.min(Integer.MAX_VALUE, iter.cost()));

    if (bitSet != null) {
        add(bitSet, iter, base);/*from   w w  w. j  av  a 2 s.c  om*/
    } else {
        while (true) {
            for (int i = pos; i < buffer.length; ++i) {
                final int doc = iter.nextDoc();
                if (doc == DocIdSetIterator.NO_MORE_DOCS) {
                    pos = i; // update pos
                    return;
                }
                buffer[i] = doc + base; // using the loop counter may help with removal of bounds checking
            }

            pos = buffer.length; // update pos
            if (pos + 1 >= threshold) {
                break;
            }

            growBuffer(pos + 1);
        }

        upgradeToBitSet();
        add(bitSet, iter, base);
    }
}