List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:org.apache.solr.search.FloatPayloadValueSource.java
License:Apache License
@Override public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { Fields fields = readerContext.reader().fields(); final Terms terms = fields.terms(indexedField); FunctionValues defaultValues = defaultValueSource.getValues(context, readerContext); // copied the bulk of this from TFValueSource - TODO: this is a very repeated pattern - base-class this advance logic stuff? return new FloatDocValues(this) { PostingsEnum docs;/*from w w w .j ava 2s . c o m*/ int atDoc; int lastDocRequested = -1; { reset(); } public void reset() throws IOException { // no one should call us for deleted docs? if (terms != null) { final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(indexedBytes)) { docs = termsEnum.postings(null, PostingsEnum.ALL); } else { docs = null; } } else { docs = null; } if (docs == null) { // dummy PostingsEnum so floatVal() can work // when would this be called? if field/val did not match? this is called for every doc? create once and cache? docs = new PostingsEnum() { @Override public int freq() { return 0; } @Override public int nextPosition() throws IOException { return -1; } @Override public int startOffset() throws IOException { return -1; } @Override public int endOffset() throws IOException { return -1; } @Override public BytesRef getPayload() throws IOException { return null; } @Override public int docID() { return DocIdSetIterator.NO_MORE_DOCS; } @Override public int nextDoc() { return DocIdSetIterator.NO_MORE_DOCS; } @Override public int advance(int target) { return DocIdSetIterator.NO_MORE_DOCS; } @Override public long cost() { return 0; } }; } atDoc = -1; } @Override public float floatVal(int doc) { try { if (doc < lastDocRequested) { // out-of-order access.... reset reset(); } lastDocRequested = doc; if (atDoc < doc) { atDoc = docs.advance(doc); } if (atDoc > doc) { // term doesn't match this document... either because we hit the // end, or because the next doc is after this doc. return defaultValues.floatVal(doc); } // a match! int freq = docs.freq(); int numPayloadsSeen = 0; float currentScore = 0; for (int i = 0; i < freq; i++) { docs.nextPosition(); BytesRef payload = docs.getPayload(); if (payload != null) { float payloadVal = decoder.decode(atDoc, docs.startOffset(), docs.endOffset(), payload); // payloadFunction = null represents "first" if (payloadFunction == null) return payloadVal; currentScore = payloadFunction.currentScore(doc, indexedField, docs.startOffset(), docs.endOffset(), numPayloadsSeen, currentScore, payloadVal); numPayloadsSeen++; } } return (numPayloadsSeen > 0) ? payloadFunction.docScore(doc, indexedField, numPayloadsSeen, currentScore) : defaultValues.floatVal(doc); } catch (IOException e) { throw new RuntimeException("caught exception in function " + description() + " : doc=" + doc, e); } } }; }
From source file:org.apache.solr.search.function.FileFloatSource.java
License:Apache License
private static float[] getFloats(FileFloatSource ffs, IndexReader reader) { float[] vals = new float[reader.maxDoc()]; if (ffs.defVal != 0) { Arrays.fill(vals, ffs.defVal); }//from w ww . ja v a2 s . c o m InputStream is; String fname = "external_" + ffs.field.getName(); try { is = VersionedFile.getLatestFile(ffs.dataDir, fname); } catch (IOException e) { // log, use defaults SolrCore.log.error("Error opening external value source file: " + e); return vals; } BufferedReader r = new BufferedReader(new InputStreamReader(is, IOUtils.CHARSET_UTF_8)); String idName = ffs.keyField.getName(); FieldType idType = ffs.keyField.getType(); // warning: lucene's termEnum.skipTo() is not optimized... it simply does a next() // because of this, simply ask the reader for a new termEnum rather than // trying to use skipTo() List<String> notFound = new ArrayList<String>(); int notFoundCount = 0; int otherErrors = 0; char delimiter = '='; BytesRef internalKey = new BytesRef(); try { TermsEnum termsEnum = MultiFields.getTerms(reader, idName).iterator(null); DocsEnum docsEnum = null; // removing deleted docs shouldn't matter // final Bits liveDocs = MultiFields.getLiveDocs(reader); for (String line; (line = r.readLine()) != null;) { int delimIndex = line.lastIndexOf(delimiter); if (delimIndex < 0) continue; int endIndex = line.length(); String key = line.substring(0, delimIndex); String val = line.substring(delimIndex + 1, endIndex); float fval; try { idType.readableToIndexed(key, internalKey); fval = Float.parseFloat(val); } catch (Exception e) { if (++otherErrors <= 10) { SolrCore.log.error("Error loading external value source + fileName + " + e + (otherErrors < 10 ? "" : "\tSkipping future errors for this file.")); } continue; // go to next line in file.. leave values as default. } if (!termsEnum.seekExact(internalKey)) { if (notFoundCount < 10) { // collect first 10 not found for logging notFound.add(key); } notFoundCount++; continue; } docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE); int doc; while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { vals[doc] = fval; } } } catch (IOException e) { // log, use defaults SolrCore.log.error("Error loading external value source: " + e); } finally { // swallow exceptions on close so we don't override any // exceptions that happened in the loop try { r.close(); } catch (Exception e) { } } SolrCore.log.info("Loaded external value source " + fname + (notFoundCount == 0 ? "" : " :" + notFoundCount + " missing keys " + notFound)); return vals; }
From source file:org.apache.solr.search.function.TermFreqValueSource.java
License:Apache License
@Override public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { Fields fields = readerContext.reader.fields(); final Terms terms = fields.terms(field); return new IntDocValues(this) { DocsEnum docs;//from w w w. j a v a2s .c om int atDoc; int lastDocRequested = -1; { reset(); } public void reset() throws IOException { // no one should call us for deleted docs? docs = terms.docs(null, indexedBytes, null); if (docs == null) { docs = new DocsEnum() { @Override public int freq() { return 0; } @Override public int docID() { return DocIdSetIterator.NO_MORE_DOCS; } @Override public int nextDoc() throws IOException { return DocIdSetIterator.NO_MORE_DOCS; } @Override public int advance(int target) throws IOException { return DocIdSetIterator.NO_MORE_DOCS; } }; } atDoc = -1; } @Override public int intVal(int doc) { try { if (doc < lastDocRequested) { // out-of-order access.... reset reset(); } lastDocRequested = doc; if (atDoc < doc) { atDoc = docs.advance(doc); } if (atDoc > doc) { // term doesn't match this document... either because we hit the // end, or because the next doc is after this doc. return 0; } // a match! return docs.freq(); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "caught exception in function " + description() + " : doc=" + doc, e); } } }; }
From source file:org.apache.solr.search.SolrIndexSearcher.java
License:Apache License
/** * Returns the first document number containing the term <code>t</code> * Returns -1 if no document was found.//from www. j a va 2s . c o m * This method is primarily intended for clients that want to fetch * documents using a unique identifier." * @return the first document number containing the term */ public int getFirstMatch(Term t) throws IOException { Fields fields = atomicReader.fields(); if (fields == null) return -1; Terms terms = fields.terms(t.field()); if (terms == null) return -1; BytesRef termBytes = t.bytes(); final TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(termBytes)) { return -1; } DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE); if (docs == null) return -1; int id = docs.nextDoc(); return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id; }
From source file:org.apache.solr.search.SolrIndexSearcher.java
License:Apache License
/** lookup the docid by the unique key field, and return the id *within* the leaf reader in the low 32 bits, and the index of the leaf reader in the high 32 bits. * -1 is returned if not found./*from w w w. jav a 2 s.com*/ * @lucene.internal */ public long lookupId(BytesRef idBytes) throws IOException { String field = schema.getUniqueKeyField().getName(); for (int i = 0, c = leafContexts.size(); i < c; i++) { final AtomicReaderContext leaf = leafContexts.get(i); final AtomicReader reader = leaf.reader(); final Terms terms = reader.terms(field); if (terms == null) continue; TermsEnum te = terms.iterator(null); if (te.seekExact(idBytes)) { DocsEnum docs = te.docs(reader.getLiveDocs(), null, DocsEnum.FLAG_NONE); int id = docs.nextDoc(); if (id == DocIdSetIterator.NO_MORE_DOCS) continue; assert docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; return (((long) i) << 32) | id; } } return -1; }
From source file:org.apache.solr.search.SolrIndexSearcher.java
License:Apache License
/** lucene.internal */ public DocSet getDocSet(DocsEnumState deState) throws IOException { int largestPossible = deState.termsEnum.docFreq(); boolean useCache = filterCache != null && largestPossible >= deState.minSetSizeCached; TermQuery key = null;/*w w w . ja v a 2 s .c o m*/ if (useCache) { key = new TermQuery(new Term(deState.fieldName, BytesRef.deepCopyOf(deState.termsEnum.term()))); DocSet result = filterCache.get(key); if (result != null) return result; } int smallSetSize = maxDoc() >> 6; int scratchSize = Math.min(smallSetSize, largestPossible); if (deState.scratch == null || deState.scratch.length < scratchSize) deState.scratch = new int[scratchSize]; final int[] docs = deState.scratch; int upto = 0; int bitsSet = 0; OpenBitSet obs = null; DocsEnum docsEnum = deState.termsEnum.docs(deState.liveDocs, deState.docsEnum, DocsEnum.FLAG_NONE); if (deState.docsEnum == null) { deState.docsEnum = docsEnum; } if (docsEnum instanceof MultiDocsEnum) { MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum) docsEnum).getSubs(); int numSubs = ((MultiDocsEnum) docsEnum).getNumSubs(); for (int subindex = 0; subindex < numSubs; subindex++) { MultiDocsEnum.EnumWithSlice sub = subs[subindex]; if (sub.docsEnum == null) continue; int base = sub.slice.start; int docid; if (largestPossible > docs.length) { if (obs == null) obs = new OpenBitSet(maxDoc()); while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { obs.fastSet(docid + base); bitsSet++; } } else { while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { docs[upto++] = docid + base; } } } } else { int docid; if (largestPossible > docs.length) { if (obs == null) obs = new OpenBitSet(maxDoc()); while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { obs.fastSet(docid); bitsSet++; } } else { while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { docs[upto++] = docid; } } } DocSet result; if (obs != null) { for (int i = 0; i < upto; i++) { obs.fastSet(docs[i]); } bitsSet += upto; result = new BitDocSet(obs, bitsSet); } else { result = upto == 0 ? DocSet.EMPTY : new SortedIntDocSet(Arrays.copyOf(docs, upto)); } if (useCache) { filterCache.put(key, result); } return result; }
From source file:org.apache.solr.search.TestDocSet.java
License:Apache License
public void doTestIteratorEqual(DocIdSet a, DocIdSet b) throws IOException { DocIdSetIterator ia = a.iterator();/* w w w. j av a 2s .c o m*/ DocIdSetIterator ib = b.iterator(); // test for next() equivalence for (;;) { int da = ia.nextDoc(); int db = ib.nextDoc(); assertEquals(da, db); assertEquals(ia.docID(), ib.docID()); if (da == DocIdSetIterator.NO_MORE_DOCS) break; } for (int i = 0; i < 10; i++) { // test random skipTo() and next() ia = a.iterator(); ib = b.iterator(); int doc = -1; for (;;) { int da, db; if (rand.nextBoolean()) { da = ia.nextDoc(); db = ib.nextDoc(); } else { int target = doc + rand.nextInt(10) + 1; // keep in mind future edge cases like probing (increase if necessary) da = ia.advance(target); db = ib.advance(target); } assertEquals(da, db); assertEquals(ia.docID(), ib.docID()); if (da == DocIdSetIterator.NO_MORE_DOCS) break; doc = da; } } }
From source file:org.apache.solr.search.TestFilteredDocIdSet.java
License:Apache License
public void testFilteredDocIdSet() throws Exception { final int maxdoc = 10; final DocIdSet innerSet = new DocIdSet() { @Override/*ww w . j a v a2s .com*/ public long ramBytesUsed() { return 0L; } @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { int docid = -1; @Override public int docID() { return docid; } @Override public int nextDoc() { docid++; return docid < maxdoc ? docid : (docid = NO_MORE_DOCS); } @Override public int advance(int target) throws IOException { return slowAdvance(target); } @Override public long cost() { return 1; } }; } }; DocIdSet filteredSet = new FilteredDocIdSet(innerSet) { @Override protected boolean match(int docid) { return docid % 2 == 0; //validate only even docids } }; DocIdSetIterator iter = filteredSet.iterator(); ArrayList<Integer> list = new ArrayList<>(); int doc = iter.advance(3); if (doc != DocIdSetIterator.NO_MORE_DOCS) { list.add(Integer.valueOf(doc)); while ((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { list.add(Integer.valueOf(doc)); } } int[] docs = new int[list.size()]; int c = 0; Iterator<Integer> intIter = list.iterator(); while (intIter.hasNext()) { docs[c++] = intIter.next().intValue(); } int[] answer = new int[] { 4, 6, 8 }; boolean same = Arrays.equals(answer, docs); if (!same) { System.out.println("answer: " + Arrays.toString(answer)); System.out.println("gotten: " + Arrays.toString(docs)); fail(); } }
From source file:org.apache.solr.search.TestRTGBase.java
License:Apache License
protected int getFirstMatch(IndexReader r, Term t) throws IOException { Fields fields = MultiFields.getFields(r); if (fields == null) return -1; Terms terms = fields.terms(t.field()); if (terms == null) return -1; BytesRef termBytes = t.bytes();//from ww w . j a v a2 s . c o m final TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(termBytes)) { return -1; } DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE); int id = docs.nextDoc(); if (id != DocIdSetIterator.NO_MORE_DOCS) { int next = docs.nextDoc(); assertEquals(DocIdSetIterator.NO_MORE_DOCS, next); } return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id; }
From source file:org.apache.solr.uninverting.DocTermOrds.java
License:Apache License
/** Call this only once (if you subclass!) */ protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) { throw new IllegalStateException( "Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); }/*from w w w. ja va2s.c o m*/ //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); final long startTime = System.nanoTime(); prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix); final int maxDoc = reader.maxDoc(); final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number final int[] lastTerm = new int[maxDoc]; // last term we saw for this document final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts) final Terms terms = reader.terms(field); if (terms == null) { // No terms return; } final TermsEnum te = terms.iterator(); final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef(); //System.out.println("seekStart=" + seekStart.utf8ToString()); if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) { // No terms match return; } // For our "term index wrapper" final List<BytesRef> indexedTerms = new ArrayList<>(); final PagedBytes indexedTermsBytes = new PagedBytes(15); // we need a minimum of 9 bytes, but round up to 12 since the space would // be wasted with most allocators anyway. byte[] tempArr = new byte[12]; // // enumerate all terms, and build an intermediate form of the un-inverted field. // // During this intermediate form, every document has a (potential) byte[] // and the int[maxDoc()] array either contains the termNumber list directly // or the *end* offset of the termNumber list in its byte array (for faster // appending and faster creation of the final form). // // idea... if things are too large while building, we could do a range of docs // at a time (but it would be a fair amount slower to build) // could also do ranges in parallel to take advantage of multiple CPUs // OPTIONAL: remap the largest df terms to the lowest 128 (single byte) // values. This requires going over the field first to find the most // frequent terms ahead of time. int termNum = 0; postingsEnum = null; // Loop begins with te positioned to first term (we call // seek above): for (;;) { final BytesRef t = te.term(); if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) { break; } //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum); visitTerm(te, termNum); if ((termNum & indexIntervalMask) == 0) { // Index this term sizeOfIndexedStrings += t.length; BytesRef indexedTerm = new BytesRef(); indexedTermsBytes.copy(t, indexedTerm); // TODO: really should 1) strip off useless suffix, // and 2) use FST not array/PagedBytes indexedTerms.add(indexedTerm); } final int df = te.docFreq(); if (df <= maxTermDocFreq) { postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE); // dF, but takes deletions into account int actualDF = 0; for (;;) { int doc = postingsEnum.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } //System.out.println(" chunk=" + chunk + " docs"); actualDF++; termInstances++; //System.out.println(" docID=" + doc); // add TNUM_OFFSET to the term number to make room for special reserved values: // 0 (end term) and 1 (index into byte array follows) int delta = termNum - lastTerm[doc] + TNUM_OFFSET; lastTerm[doc] = termNum; int val = index[doc]; if ((val & 0xff) == 1) { // index into byte array (actually the end of // the doc-specific byte[] when building) int pos = val >>> 8; int ilen = vIntSize(delta); byte[] arr = bytes[doc]; int newend = pos + ilen; if (newend > arr.length) { // We avoid a doubling strategy to lower memory usage. // this faceting method isn't for docs with many terms. // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary. // TODO: figure out what array lengths we can round up to w/o actually using more memory // (how much space does a byte[] take up? Is data preceded by a 32 bit length only? // It should be safe to round up to the nearest 32 bits in any case. int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment byte[] newarr = new byte[newLen]; System.arraycopy(arr, 0, newarr, 0, pos); arr = newarr; bytes[doc] = newarr; } pos = writeInt(delta, arr, pos); index[doc] = (pos << 8) | 1; // update pointer to end index in byte[] } else { // OK, this int has data in it... find the end (a zero starting byte - not // part of another number, hence not following a byte with the high bit set). int ipos; if (val == 0) { ipos = 0; } else if ((val & 0x0000ff80) == 0) { ipos = 1; } else if ((val & 0x00ff8000) == 0) { ipos = 2; } else if ((val & 0xff800000) == 0) { ipos = 3; } else { ipos = 4; } //System.out.println(" ipos=" + ipos); int endPos = writeInt(delta, tempArr, ipos); //System.out.println(" endpos=" + endPos); if (endPos <= 4) { //System.out.println(" fits!"); // value will fit in the integer... move bytes back for (int j = ipos; j < endPos; j++) { val |= (tempArr[j] & 0xff) << (j << 3); } index[doc] = val; } else { // value won't fit... move integer into byte[] for (int j = 0; j < ipos; j++) { tempArr[j] = (byte) val; val >>>= 8; } // point at the end index in the byte[] index[doc] = (endPos << 8) | 1; bytes[doc] = tempArr; tempArr = new byte[12]; } } } setActualDocFreq(termNum, actualDF); } termNum++; if (te.next() == null) { break; } } numTermsInField = termNum; long midPoint = System.nanoTime(); if (termInstances == 0) { // we didn't invert anything // lower memory consumption. tnums = null; } else { this.index = index; // // transform intermediate form into the final form, building a single byte[] // at a time, and releasing the intermediate byte[]s as we go to avoid // increasing the memory footprint. // for (int pass = 0; pass < 256; pass++) { byte[] target = tnums[pass]; int pos = 0; // end in target; if (target != null) { pos = target.length; } else { target = new byte[4096]; } // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx // where pp is the pass (which array we are building), and xx is all values. // each pass shares the same byte[] for termNumber lists. for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24)) { int lim = Math.min(docbase + (1 << 16), maxDoc); for (int doc = docbase; doc < lim; doc++) { //System.out.println(" pass=" + pass + " process docID=" + doc); int val = index[doc]; if ((val & 0xff) == 1) { int len = val >>> 8; //System.out.println(" ptr pos=" + pos); index[doc] = (pos << 8) | 1; // change index to point to start of array if ((pos & 0xff000000) != 0) { // we only have 24 bits for the array index throw new IllegalStateException( "Too many values for UnInvertedField faceting on field " + field); } byte[] arr = bytes[doc]; /* for(byte b : arr) { //System.out.println(" b=" + Integer.toHexString((int) b)); } */ bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM if (target.length <= pos + len) { int newlen = target.length; /*** we don't have to worry about the array getting too large * since the "pos" param will overflow first (only 24 bits available) if ((newlen<<1) <= 0) { // overflow... newlen = Integer.MAX_VALUE; if (newlen <= pos + len) { throw new SolrException(400,"Too many terms to uninvert field!"); } } else { while (newlen <= pos + len) newlen<<=1; // doubling strategy } ****/ while (newlen <= pos + len) newlen <<= 1; // doubling strategy byte[] newtarget = new byte[newlen]; System.arraycopy(target, 0, newtarget, 0, pos); target = newtarget; } System.arraycopy(arr, 0, target, pos, len); pos += len + 1; // skip single byte at end and leave it 0 for terminator } } } // shrink array if (pos < target.length) { byte[] newtarget = new byte[pos]; System.arraycopy(target, 0, newtarget, 0, pos); target = newtarget; } tnums[pass] = target; if ((pass << 16) > maxDoc) break; } } indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]); long endTime = System.nanoTime(); total_time = (int) TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS); phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint - startTime, TimeUnit.NANOSECONDS); }