List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:org.apache.solr.request.DocValuesStats.java
License:Apache License
/** accumulates per-segment single-valued stats */ static int accumSingle(int counts[], int docBase, FieldFacetStats[] facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex); int missingDocCount = 0; int doc;/*from w w w. ja va 2s . c o m*/ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term = si.getOrd(doc); if (term >= 0) { if (map != null) { term = (int) ordMap.get(term); } counts[term]++; for (FieldFacetStats f : facetStats) { f.facetTermNum(docBase + doc, term); } } else { for (FieldFacetStats f : facetStats) { f.facetMissingNum(docBase + doc); } missingDocCount++; } } return missingDocCount; }
From source file:org.apache.solr.request.DocValuesStats.java
License:Apache License
/** accumulates per-segment multi-valued stats */ static int accumMulti(int counts[], int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex); int missingDocCount = 0; int doc;//from w w w . j a v a 2 s .c o m while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); long ord; boolean emptyTerm = true; while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { emptyTerm = false; int term = (int) ord; if (map != null) { term = (int) ordMap.get(term); } counts[term]++; for (FieldFacetStats f : facetStats) { f.facetTermNum(docBase + doc, term); } } if (emptyTerm) { for (FieldFacetStats f : facetStats) { f.facetMissingNum(docBase + doc); } missingDocCount++; } } return missingDocCount; }
From source file:org.apache.solr.request.IntervalFacets.java
License:Apache License
private void accumIntervalsMulti(SortedSetDocValues ssdv, DocIdSetIterator disi, Bits bits) throws IOException { // First update the ordinals in the intervals for this segment for (FacetInterval interval : intervals) { interval.updateContext(ssdv);//from www. j a v a2 s . c o m } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (bits != null && bits.get(doc) == false) { continue; } ssdv.setDocument(doc); long currOrd; int currentInterval = 0; while ((currOrd = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { boolean evaluateNextInterval = true; while (evaluateNextInterval && currentInterval < intervals.length) { IntervalCompareResult result = intervals[currentInterval].includes(currOrd); switch (result) { case INCLUDED: /* * Increment the current interval and move to the next one using * the same value */ intervals[currentInterval].incCount(); currentInterval++; break; case LOWER_THAN_START: /* * None of the next intervals will match this value (all of them have * higher start value). Move to the next value for this document. */ evaluateNextInterval = false; break; case GREATER_THAN_END: /* * Next interval may match this value */ currentInterval++; break; } } } } }
From source file:org.apache.solr.request.IntervalFacets.java
License:Apache License
private void accumIntervalsSingle(SortedDocValues sdv, DocIdSetIterator disi, Bits bits) throws IOException { // First update the ordinals in the intervals to this segment for (FacetInterval interval : intervals) { interval.updateContext(sdv);//from w w w .j a v a2s . com } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (bits != null && bits.get(doc) == false) { continue; } int ord = sdv.getOrd(doc); if (ord >= 0) { accumInterval(ord); } } }
From source file:org.apache.solr.request.SimpleFacets.java
License:Apache License
/** * Returns a list of terms in the specified field along with the * corresponding count of documents in the set that match that constraint. * This method uses the FilterCache to get the intersection count between <code>docs</code> * and the DocSet for each term in the filter. * * @see FacetParams#FACET_LIMIT//from w w w. j av a 2 s .c om * @see FacetParams#FACET_ZEROS * @see FacetParams#FACET_MISSING */ public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException { /* :TODO: potential optimization... * cache the Terms with the highest docFreq and try them first * don't enum if we get our max from them */ // Minimum term docFreq in order to use the filterCache for that term. int minDfFilterCache = params.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0); // make sure we have a set that is fast for random access, if we will use it for that DocSet fastForRandomSet = docs; if (minDfFilterCache > 0 && docs instanceof SortedIntDocSet) { SortedIntDocSet sset = (SortedIntDocSet) docs; fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size()); } IndexSchema schema = searcher.getSchema(); AtomicReader r = searcher.getAtomicReader(); FieldType ft = schema.getFieldType(field); boolean sortByCount = sort.equals("count") || sort.equals("true"); final int maxsize = limit >= 0 ? offset + limit : Integer.MAX_VALUE - 1; final BoundedTreeSet<CountPair<BytesRef, Integer>> queue = sortByCount ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(maxsize) : null; final NamedList<Integer> res = new NamedList<Integer>(); int min = mincount - 1; // the smallest value in the top 'N' values int off = offset; int lim = limit >= 0 ? limit : Integer.MAX_VALUE; BytesRef startTermBytes = null; if (prefix != null) { String indexedPrefix = ft.toInternal(prefix); startTermBytes = new BytesRef(indexedPrefix); } Fields fields = r.fields(); Terms terms = fields == null ? null : fields.terms(field); TermsEnum termsEnum = null; SolrIndexSearcher.DocsEnumState deState = null; BytesRef term = null; if (terms != null) { termsEnum = terms.iterator(null); // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for // facet.offset when sorting by index order. if (startTermBytes != null) { if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) { termsEnum = null; } else { term = termsEnum.term(); } } else { // position termsEnum on first term term = termsEnum.next(); } } DocsEnum docsEnum = null; CharsRef charsRef = new CharsRef(10); if (docs.size() >= mincount) { while (term != null) { if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes)) break; int df = termsEnum.docFreq(); // If we are sorting, we can use df>min (rather than >=) since we // are going in index order. For certain term distributions this can // make a large difference (for example, many terms with df=1). if (df > 0 && df > min) { int c; if (df >= minDfFilterCache) { // use the filter cache if (deState == null) { deState = new SolrIndexSearcher.DocsEnumState(); deState.fieldName = field; deState.liveDocs = r.getLiveDocs(); deState.termsEnum = termsEnum; deState.docsEnum = docsEnum; } c = searcher.numDocs(docs, deState); docsEnum = deState.docsEnum; } else { // iterate over TermDocs to calculate the intersection // TODO: specialize when base docset is a bitset or hash set (skipDocs)? or does it matter for this? // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl) // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet? docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE); c = 0; if (docsEnum instanceof MultiDocsEnum) { MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum) docsEnum).getSubs(); int numSubs = ((MultiDocsEnum) docsEnum).getNumSubs(); for (int subindex = 0; subindex < numSubs; subindex++) { MultiDocsEnum.EnumWithSlice sub = subs[subindex]; if (sub.docsEnum == null) continue; int base = sub.slice.start; int docid; while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (fastForRandomSet.exists(docid + base)) c++; } } } else { int docid; while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (fastForRandomSet.exists(docid)) c++; } } } if (sortByCount) { if (c > min) { BytesRef termCopy = BytesRef.deepCopyOf(term); queue.add(new CountPair<BytesRef, Integer>(termCopy, c)); if (queue.size() >= maxsize) min = queue.last().val; } } else { if (c >= mincount && --off < 0) { if (--lim < 0) break; ft.indexedToReadable(term, charsRef); res.add(charsRef.toString(), c); } } } term = termsEnum.next(); } } if (sortByCount) { for (CountPair<BytesRef, Integer> p : queue) { if (--off >= 0) continue; if (--lim < 0) break; ft.indexedToReadable(p.key, charsRef); res.add(charsRef.toString(), p.val); } } if (missing) { res.add(null, getFieldMissingCount(searcher, docs, field)); } return res; }
From source file:org.apache.solr.response.SortingResponseWriter.java
License:Apache License
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse res) throws IOException { Exception e1 = res.getException(); if (e1 != null) { e1.printStackTrace(new PrintWriter(writer)); return;//from w w w .j ava 2s . com } SolrRequestInfo info = SolrRequestInfo.getRequestInfo(); SortSpec sortSpec = info.getResponseBuilder().getSortSpec(); if (sortSpec == null) { throw new IOException(new SyntaxError("No sort criteria was provided.")); } SolrIndexSearcher searcher = req.getSearcher(); Sort sort = searcher.weightSort(sortSpec.getSort()); if (sort == null) { throw new IOException(new SyntaxError("No sort criteria was provided.")); } if (sort.needsScores()) { throw new IOException(new SyntaxError("Scoring is not currently supported with xsort.")); } FixedBitSet[] sets = (FixedBitSet[]) req.getContext().get("export"); Integer th = (Integer) req.getContext().get("totalHits"); if (sets == null) { throw new IOException(new SyntaxError("xport RankQuery is required for xsort: rq={!xport}")); } int totalHits = th.intValue(); SolrParams params = req.getParams(); String fl = params.get("fl"); if (fl == null) { throw new IOException(new SyntaxError("export field list (fl) must be specified.")); } String[] fields = fl.split(","); for (int i = 0; i < fields.length; i++) { if (fl.trim().equals("score")) { throw new IOException(new SyntaxError("Scoring is not currently supported with xsort.")); } } FieldWriter[] fieldWriters = getFieldWriters(fields, req.getSearcher()); writer.write( "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":" + totalHits + ", \"docs\":["); //Write the data. List<AtomicReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves(); SortDoc sortDoc = getSortDoc(req.getSearcher(), sort.getSort()); int count = 0; int queueSize = 30000; SortQueue queue = new SortQueue(queueSize, sortDoc); SortDoc[] outDocs = new SortDoc[queueSize]; boolean commaNeeded = false; while (count < totalHits) { //long begin = System.nanoTime(); queue.reset(); SortDoc top = queue.top(); for (int i = 0; i < leaves.size(); i++) { sortDoc.setNextReader(leaves.get(i)); DocIdSetIterator it = sets[i].iterator(); int docId = -1; while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { sortDoc.setValues(docId); if (top.lessThan(sortDoc)) { top.setValues(sortDoc); top = queue.updateTop(); } } } int outDocsIndex = -1; for (int i = 0; i < queueSize; i++) { SortDoc s = queue.pop(); if (s.docId > -1) { outDocs[++outDocsIndex] = s; } } //long end = System.nanoTime(); count += (outDocsIndex + 1); try { for (int i = outDocsIndex; i >= 0; --i) { SortDoc s = outDocs[i]; if (commaNeeded) { writer.write(','); } writer.write('{'); writeDoc(s, leaves, fieldWriters, sets, writer); writer.write('}'); commaNeeded = true; s.reset(); } } catch (Throwable e) { Throwable ex = e; while (ex != null) { String m = ex.getMessage(); if (m != null && m.contains("Broken pipe")) { logger.info("Early client disconnect during export"); return; } ex = ex.getCause(); } if (e instanceof IOException) { throw ((IOException) e); } else { throw new IOException(e); } } } //System.out.println("Sort Time 2:"+Long.toString(total/1000000)); writer.write("]}}"); writer.flush(); }
From source file:org.apache.solr.schema.TestPointFields.java
License:Apache License
private void doTestInternals(String field, String[] values) throws IOException { assertTrue(h.getCore().getLatestSchema().getField(field).getType() instanceof PointField); for (int i = 0; i < 10; i++) { assertU(adoc("id", String.valueOf(i), field, values[i])); }// ww w.j a va 2 s . co m assertU(commit()); IndexReader ir; RefCounted<SolrIndexSearcher> ref = null; SchemaField sf = h.getCore().getLatestSchema().getField(field); boolean ignoredField = !(sf.indexed() || sf.stored() || sf.hasDocValues()); try { ref = h.getCore().getSearcher(); SolrIndexSearcher searcher = ref.get(); ir = searcher.getIndexReader(); // our own SlowCompositeReader to check DocValues on disk w/o the UninvertingReader added by SolrIndexSearcher final LeafReader leafReaderForCheckingDVs = SlowCompositeReaderWrapper.wrap(searcher.getRawReader()); if (sf.indexed()) { assertEquals("Field " + field + " should have point values", 10, PointValues.size(ir, field)); } else { assertEquals("Field " + field + " should have no point values", 0, PointValues.size(ir, field)); } if (ignoredField) { assertTrue("Field " + field + " should not have docValues", DocValues.getSortedNumeric(leafReaderForCheckingDVs, field) .nextDoc() == DocIdSetIterator.NO_MORE_DOCS); assertTrue("Field " + field + " should not have docValues", DocValues .getNumeric(leafReaderForCheckingDVs, field).nextDoc() == DocIdSetIterator.NO_MORE_DOCS); assertTrue("Field " + field + " should not have docValues", DocValues .getSorted(leafReaderForCheckingDVs, field).nextDoc() == DocIdSetIterator.NO_MORE_DOCS); assertTrue("Field " + field + " should not have docValues", DocValues .getBinary(leafReaderForCheckingDVs, field).nextDoc() == DocIdSetIterator.NO_MORE_DOCS); } else { if (sf.hasDocValues()) { if (sf.multiValued()) { assertFalse("Field " + field + " should have docValues", DocValues.getSortedNumeric(leafReaderForCheckingDVs, field) .nextDoc() == DocIdSetIterator.NO_MORE_DOCS); } else { assertFalse("Field " + field + " should have docValues", DocValues.getNumeric(leafReaderForCheckingDVs, field) .nextDoc() == DocIdSetIterator.NO_MORE_DOCS); } } else { expectThrows(IllegalStateException.class, () -> DocValues.getSortedNumeric(leafReaderForCheckingDVs, field)); expectThrows(IllegalStateException.class, () -> DocValues.getNumeric(leafReaderForCheckingDVs, field)); } expectThrows(IllegalStateException.class, () -> DocValues.getSorted(leafReaderForCheckingDVs, field)); expectThrows(IllegalStateException.class, () -> DocValues.getBinary(leafReaderForCheckingDVs, field)); } for (LeafReaderContext leave : ir.leaves()) { LeafReader reader = leave.reader(); for (int i = 0; i < reader.numDocs(); i++) { Document doc = reader.document(i); if (sf.stored()) { assertNotNull("Field " + field + " not found. Doc: " + doc, doc.get(field)); } else { assertNull(doc.get(field)); } } } } finally { ref.decref(); } clearIndex(); assertU(commit()); }
From source file:org.apache.solr.search.BitDocSet.java
License:Apache License
/*** DocIterator using nextSetBit() public DocIterator iterator() {/*from w ww .ja v a 2 s. com*/ return new DocIterator() { int pos=bits.nextSetBit(0); public boolean hasNext() { return pos>=0; } public Integer next() { return nextDoc(); } public void remove() { bits.clear(pos); } public int nextDoc() { int old=pos; pos=bits.nextSetBit(old+1); return old; } public float score() { return 0.0f; } }; } ***/ @Override public DocIterator iterator() { return new DocIterator() { private final OpenBitSetIterator iter = new OpenBitSetIterator(bits); private int pos = iter.nextDoc(); @Override public boolean hasNext() { return pos != DocIdSetIterator.NO_MORE_DOCS; } @Override public Integer next() { return nextDoc(); } @Override public void remove() { bits.clear(pos); } @Override public int nextDoc() { int old = pos; pos = iter.nextDoc(); return old; } @Override public float score() { return 0.0f; } }; }
From source file:org.apache.solr.search.CitationLRUCache.java
License:Apache License
private void unInvertedTheDamnThing(AtomicReader reader, Map<String, List<String>> fields, Bits liveDocs, KVSetter setter) throws IOException { if (liveDocs == null) { liveDocs = reader.getLiveDocs(); }/*from w w w .java 2s . com*/ int docBase = reader.getContext().docBase; //System.out.println("***REBUILDING***"); //System.out.println("Generating mapping from: " + reader.toString() + " docBase=" + docBase); // load multiple values->idlucene mapping for (String idField : fields.get("intFieldsMV")) { DocTermOrds unInvertedIndex = new DocTermOrds(reader, liveDocs, idField); TermsEnum termsEnum = unInvertedIndex.getOrdTermsEnum(reader); if (termsEnum == null) { continue; } DocsEnum docs = null; for (;;) { BytesRef term = termsEnum.next(); if (term == null) break; Integer t = FieldCache.DEFAULT_INT_PARSER.parseInt(term); docs = termsEnum.docs(liveDocs, docs, 0); // we don't need docFreq int i = 0; for (;;) { int d = docs.nextDoc(); if (d == DocIdSetIterator.NO_MORE_DOCS) { break; } setter.set(docBase, d, treatIdentifiersAsText ? Integer.toString(t) : t); i += 1; //if (i > 1) { // log.warn("The term {} is used by more than one document {} ; your cache has problems", t, d+docBase); //} } } } /* * Read every term * - for each term get all live documents * - and do something with the pair: (docid, term) */ for (String idField : fields.get("textFieldsMV")) { DocTermOrds unInvertedIndex = new DocTermOrds(reader, liveDocs, idField); TermsEnum termsEnum = unInvertedIndex.getOrdTermsEnum(reader); if (termsEnum == null) { continue; } DocsEnum docs = null; for (;;) { BytesRef term = termsEnum.next(); if (term == null) break; String t = term.utf8ToString(); docs = termsEnum.docs(liveDocs, docs, 0); // we don't need docFreq for (;;) { int d = docs.nextDoc(); if (d == DocIdSetIterator.NO_MORE_DOCS) { break; } setter.set(docBase, d, t); //if (i > 1) { // log.warn("The term {} is used by more than one document {} ; your cache has problems", t, d+docBase); //} } } } // load single valued ids for (String idField : fields.get("textFields")) { BinaryDocValues idMapping = getCacheReuseExisting(reader, idField); Integer i = 0; BytesRef ret = new BytesRef(); while (i < reader.maxDoc()) { if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) { //System.out.println("skipping: " + i); i++; continue; } idMapping.get(i, ret); if (ret.length > 0) { setter.set(docBase, i, ret.utf8ToString()); // in this case, docbase will always be 0 } i++; } if (purgeCache) FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey()); } for (String idField : fields.get("intFields")) { Ints idMapping = FieldCache.DEFAULT.getInts(reader, idField, false); Integer i = 0; while (i < reader.maxDoc()) { if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) { //System.out.println("skipping: " + i); i++; continue; } setter.set(docBase, i, treatIdentifiersAsText ? Integer.toString(idMapping.get(i)) : idMapping.get(i)); i++; } } }
From source file:org.apache.solr.search.DocSetBuilder.java
License:Apache License
public void add(DocIdSetIterator iter, int base) throws IOException { grow((int) Math.min(Integer.MAX_VALUE, iter.cost())); if (bitSet != null) { add(bitSet, iter, base);/*from w w w. j av a 2 s.c om*/ } else { while (true) { for (int i = pos; i < buffer.length; ++i) { final int doc = iter.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { pos = i; // update pos return; } buffer[i] = doc + base; // using the loop counter may help with removal of bounds checking } pos = buffer.length; // update pos if (pos + 1 >= threshold) { break; } growBuffer(pos + 1); } upgradeToBitSet(); add(bitSet, iter, base); } }