List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:org.apache.solr.util.TestOpenBitSet.java
License:Apache License
void doIterate(BitSet a, OpenBitSet b) { int aa = -1, bb = -1; OpenBitSetIterator iterator = new OpenBitSetIterator(b); do {//from w w w . j ava 2s . co m aa = a.nextSetBit(aa + 1); if (rand.nextBoolean()) { iterator.nextDoc(); bb = iterator.docID(); } else { iterator.advance(bb + 1); bb = iterator.docID(); } assertEquals(aa == -1 ? DocIdSetIterator.NO_MORE_DOCS : aa, bb); } while (aa >= 0); }
From source file:org.archive.porky.FindAndIntersectionsUsingPForDeltaDocIdSetUDF.java
License:Apache License
public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() == 0) return null; DataBag bagOfBags = (DataBag) input.get(0); DocSet pForDeltaDocSet = null;// w w w . java2 s .co m ArrayList<DocIdSet> docs = new ArrayList<DocIdSet>(); try { for (Tuple t : bagOfBags) { DataBag bag = (DataBag) t.get(0); pForDeltaDocSet = DocSetFactory.getPForDeltaDocSetInstance(); for (Tuple tup : bag) { if (tup != null && tup.size() == 1) { pForDeltaDocSet.addDoc((Integer) tup.get(0)); } } docs.add(pForDeltaDocSet); } ArrayList<Integer> intersectedIds = new ArrayList<Integer>(); AndDocIdSet andSet = new AndDocIdSet(docs); DocIdSetIterator iter = andSet.iterator(); int docId = iter.nextDoc(); while (docId != DocIdSetIterator.NO_MORE_DOCS) { intersectedIds.add(docId); docId = iter.nextDoc(); } //return bag of intersected IDs DataBag resultBag = new NonSpillableDataBag(intersectedIds.size()); for (int Id : intersectedIds) { Tuple newTuple = TupleFactory.getInstance().newTuple(1); newTuple.set(0, new Integer(Id)); resultBag.add(newTuple); } return resultBag; } catch (Exception e) { throw WrappedIOException.wrap("Caught exception processing input row ", e); } }
From source file:org.codelibs.elasticsearch.common.lucene.index.FilterableTermsEnum.java
License:Apache License
@Override public boolean seekExact(BytesRef text) throws IOException { int docFreq = 0; long totalTermFreq = 0; for (Holder anEnum : enums) { if (anEnum.termsEnum.seekExact(text)) { if (anEnum.bits == null) { docFreq += anEnum.termsEnum.docFreq(); if (docsEnumFlag == PostingsEnum.FREQS) { long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq(); if (totalTermFreq == -1 || leafTotalTermFreq == -1) { totalTermFreq = -1; continue; }//from w ww. j a v a2s . c o m totalTermFreq += leafTotalTermFreq; } } else { final PostingsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.postings(anEnum.docsEnum, docsEnumFlag); // 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not if (docsEnumFlag == PostingsEnum.FREQS) { for (int docId = docsEnum .nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { if (anEnum.bits != null && anEnum.bits.get(docId) == false) { continue; } docFreq++; // docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value // is really 1 or unrecorded when filtering like this totalTermFreq += docsEnum.freq(); } } else { for (int docId = docsEnum .nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { if (anEnum.bits != null && anEnum.bits.get(docId) == false) { continue; } // docsEnum.freq() behaviour is undefined if docsEnumFlag==PostingsEnum.FLAG_NONE so don't bother with call docFreq++; } } } } } if (docFreq > 0) { currentDocFreq = docFreq; currentTotalTermFreq = totalTermFreq; current = text; return true; } else { currentDocFreq = NOT_FOUND; currentTotalTermFreq = NOT_FOUND; current = null; return false; } }
From source file:org.codelibs.elasticsearch.common.lucene.Lucene.java
License:Apache License
/** * Check whether there is one or more documents matching the provided query. *//*from www. j a va 2s .co m*/ public static boolean exists(IndexSearcher searcher, Query query) throws IOException { final Weight weight = searcher.createNormalizedWeight(query, false); // the scorer API should be more efficient at stopping after the first // match than the bulk scorer API for (LeafReaderContext context : searcher.getIndexReader().leaves()) { final Scorer scorer = weight.scorer(context); if (scorer == null) { continue; } final Bits liveDocs = context.reader().getLiveDocs(); final DocIdSetIterator iterator = scorer.iterator(); for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { if (liveDocs == null || liveDocs.get(doc)) { return true; } } } return false; }
From source file:org.codelibs.elasticsearch.common.lucene.search.XMoreLikeThis.java
License:Apache License
/** * Adds terms and frequencies found in vector into the Map termFreqMap * * @param termFreqMap a Map of terms and their frequencies * @param vector List of terms and their frequencies for a doc/field * @param fieldName Optional field name of the terms for skip terms */// w w w . j av a2s . c om private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException { final TermsEnum termsEnum = vector.iterator(); final CharsRefBuilder spare = new CharsRefBuilder(); BytesRef text; while ((text = termsEnum.next()) != null) { spare.copyUTF8Bytes(text); final String term = spare.toString(); if (isNoiseWord(term)) { continue; } if (isSkipTerm(fieldName, term)) { continue; } final PostingsEnum docs = termsEnum.postings(null); int freq = 0; while (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { freq += docs.freq(); } // increment frequency Int cnt = termFreqMap.get(term); if (cnt == null) { cnt = new Int(); termFreqMap.put(term, cnt); cnt.x = freq; } else { cnt.x += freq; } } }
From source file:org.codelibs.elasticsearch.common.lucene.uid.PerThreadIDAndVersionLookup.java
License:Apache License
/** Return null if id is not found. */ public DocIdAndVersion lookup(BytesRef id, Bits liveDocs, LeafReaderContext context) throws IOException { if (termsEnum.seekExact(id)) { // there may be more than one matching docID, in the case of nested docs, so we want the last one: docsEnum = termsEnum.postings(docsEnum, 0); int docID = DocIdSetIterator.NO_MORE_DOCS; for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) { if (liveDocs != null && liveDocs.get(d) == false) { continue; }//from w ww .jav a 2 s. co m docID = d; } if (docID != DocIdSetIterator.NO_MORE_DOCS) { return new DocIdAndVersion(docID, versions.get(docID), context); } } return null; }
From source file:org.codelibs.elasticsearch.search.aggregations.bucket.nested.ReverseNestedAggregator.java
License:Apache License
@Override protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { // In ES if parent is deleted, then also the children are deleted, so the child docs this agg receives // must belong to parent docs that is alive. For this reason acceptedDocs can be null here. final BitSet parentDocs = parentBitsetProducer.getBitSet(ctx); if (parentDocs == null) { return LeafBucketCollector.NO_OP_COLLECTOR; }//from ww w .jav a 2 s . co m final LongIntHashMap bucketOrdToLastCollectedParentDoc = new LongIntHashMap(32); return new LeafBucketCollectorBase(sub, null) { @Override public void collect(int childDoc, long bucket) throws IOException { // fast forward to retrieve the parentDoc this childDoc belongs to final int parentDoc = parentDocs.nextSetBit(childDoc); assert childDoc <= parentDoc && parentDoc != DocIdSetIterator.NO_MORE_DOCS; int keySlot = bucketOrdToLastCollectedParentDoc.indexOf(bucket); if (bucketOrdToLastCollectedParentDoc.indexExists(keySlot)) { int lastCollectedParentDoc = bucketOrdToLastCollectedParentDoc.indexGet(keySlot); if (parentDoc > lastCollectedParentDoc) { collectBucket(sub, parentDoc, bucket); bucketOrdToLastCollectedParentDoc.indexReplace(keySlot, parentDoc); } } else { collectBucket(sub, parentDoc, bucket); bucketOrdToLastCollectedParentDoc.indexInsert(keySlot, bucket, parentDoc); } } }; }
From source file:org.cosmo.common.model.PublicFolder.java
License:Apache License
public static long[] getAllItemRecordIds(org.cosmo.common.xml.XML rootFolder) { try {//from ww w .j a v a2s . c om // track duplicates - faster than HashSet OpenBitSet recordIdSet = getAllItemRecordIdSet(rootFolder); LongArrayList recordIds = new LongArrayList((int) recordIdSet.cardinality()); DocIdSetIterator recordIdIterator = recordIdSet.iterator(); for (int docId = recordIdIterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS;) { recordIds.add((long) docId); docId = recordIdIterator.nextDoc(); } return recordIds.elements(); } catch (IOException ioe) { throw new RuntimeException(ioe); } }
From source file:org.cosmo.common.record.SearchResult.java
License:Apache License
public long[] records(boolean load) throws Exception { if (Log.jlucene.getLevel() == java.util.logging.Level.FINE) { Log.jlucene.fine(Fmt.S("%s Start Fetching Records", Clock.timer().markAndCheckRunning())); }//from ww w . jav a2 s . com DocIdSetIterator resultsIterator = _result.iterator(); Search search = _context._searchField._declaringMeta.search(); LongArrayList ids = new LongArrayList(); for (int docId = resultsIterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS;) { ids.add((long) docId); docId = resultsIterator.nextDoc(); } ids.trimToSize(); // XXX REVMOE assertAndCorrectIds() ONCE BUG IS FIXED!!!! //assertAndCorrectIds(search, ids); //Sorting.quickSort(ids.elements(), 0, ids.size(), IdComparator); throw new RuntimeException("fix below commenouted out line due to refactoring"); /*Sorting.mergeSort(ids.elements(), 0, ids.size(), RssContent.PubDateComparator.Instance); if (Log.jlucene.getLevel() == ariba.util.log.Log.DebugLevel) { Log.jlucene.debug("%s Done Fetching Records", Clock.timer().markAndCheckRunning()); } return ids.elements(); */ }
From source file:org.elasticsearch.action.termvectors.TermVectorsWriter.java
License:Apache License
private PostingsEnum writeTermWithDocsOnly(TermsEnum iterator, PostingsEnum docsEnum) throws IOException { docsEnum = iterator.postings(docsEnum); int nextDoc = docsEnum.nextDoc(); assert nextDoc != DocIdSetIterator.NO_MORE_DOCS; writeFreq(docsEnum.freq());/*w w w . ja v a 2 s . c o m*/ nextDoc = docsEnum.nextDoc(); assert nextDoc == DocIdSetIterator.NO_MORE_DOCS; return docsEnum; }