List of usage examples for org.apache.lucene.search DocIdSetIterator nextDoc
public abstract int nextDoc() throws IOException;
From source file:org.archive.porky.FindAndIntersectionsUsingPForDeltaDocIdSetUDF.java
License:Apache License
public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() == 0) return null; DataBag bagOfBags = (DataBag) input.get(0); DocSet pForDeltaDocSet = null;/*from www .ja v a 2 s .com*/ ArrayList<DocIdSet> docs = new ArrayList<DocIdSet>(); try { for (Tuple t : bagOfBags) { DataBag bag = (DataBag) t.get(0); pForDeltaDocSet = DocSetFactory.getPForDeltaDocSetInstance(); for (Tuple tup : bag) { if (tup != null && tup.size() == 1) { pForDeltaDocSet.addDoc((Integer) tup.get(0)); } } docs.add(pForDeltaDocSet); } ArrayList<Integer> intersectedIds = new ArrayList<Integer>(); AndDocIdSet andSet = new AndDocIdSet(docs); DocIdSetIterator iter = andSet.iterator(); int docId = iter.nextDoc(); while (docId != DocIdSetIterator.NO_MORE_DOCS) { intersectedIds.add(docId); docId = iter.nextDoc(); } //return bag of intersected IDs DataBag resultBag = new NonSpillableDataBag(intersectedIds.size()); for (int Id : intersectedIds) { Tuple newTuple = TupleFactory.getInstance().newTuple(1); newTuple.set(0, new Integer(Id)); resultBag.add(newTuple); } return resultBag; } catch (Exception e) { throw WrappedIOException.wrap("Caught exception processing input row ", e); } }
From source file:org.codelibs.elasticsearch.common.lucene.Lucene.java
License:Apache License
/** * Check whether there is one or more documents matching the provided query. *//* ww w.ja v a 2 s. c o m*/ public static boolean exists(IndexSearcher searcher, Query query) throws IOException { final Weight weight = searcher.createNormalizedWeight(query, false); // the scorer API should be more efficient at stopping after the first // match than the bulk scorer API for (LeafReaderContext context : searcher.getIndexReader().leaves()) { final Scorer scorer = weight.scorer(context); if (scorer == null) { continue; } final Bits liveDocs = context.reader().getLiveDocs(); final DocIdSetIterator iterator = scorer.iterator(); for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { if (liveDocs == null || liveDocs.get(doc)) { return true; } } } return false; }
From source file:org.codelibs.elasticsearch.search.aggregations.bucket.nested.NestedAggregator.java
License:Apache License
@Override public LeafBucketCollector getLeafCollector(final LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(ctx); IndexSearcher searcher = new IndexSearcher(topLevelContext); searcher.setQueryCache(null);/*www.j a v a2 s . c o m*/ Weight weight = searcher.createNormalizedWeight(childFilter, false); Scorer childDocsScorer = weight.scorer(ctx); final BitSet parentDocs = parentFilter.getBitSet(ctx); final DocIdSetIterator childDocs = childDocsScorer != null ? childDocsScorer.iterator() : null; return new LeafBucketCollectorBase(sub, null) { @Override public void collect(int parentDoc, long bucket) throws IOException { // if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent // doc), so we can skip: if (parentDoc == 0 || parentDocs == null || childDocs == null) { return; } final int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1); int childDocId = childDocs.docID(); if (childDocId <= prevParentDoc) { childDocId = childDocs.advance(prevParentDoc + 1); } for (; childDocId < parentDoc; childDocId = childDocs.nextDoc()) { collectBucket(sub, childDocId, bucket); } } }; }
From source file:org.codelibs.elasticsearch.search.profile.query.ProfileScorer.java
License:Apache License
@Override public DocIdSetIterator iterator() { final DocIdSetIterator in = scorer.iterator(); return new DocIdSetIterator() { @Override// w w w .j ava2 s .com public int advance(int target) throws IOException { profile.startTime(QueryTimingType.ADVANCE); try { return in.advance(target); } finally { profile.stopAndRecordTime(); } } @Override public int nextDoc() throws IOException { profile.startTime(QueryTimingType.NEXT_DOC); try { return in.nextDoc(); } finally { profile.stopAndRecordTime(); } } @Override public int docID() { return in.docID(); } @Override public long cost() { return in.cost(); } }; }
From source file:org.codelibs.elasticsearch.search.profile.query.ProfileScorer.java
License:Apache License
@Override public TwoPhaseIterator twoPhaseIterator() { final TwoPhaseIterator in = scorer.twoPhaseIterator(); if (in == null) { return null; }/*from w w w .j a v a 2 s .co m*/ final DocIdSetIterator inApproximation = in.approximation(); final DocIdSetIterator approximation = new DocIdSetIterator() { @Override public int advance(int target) throws IOException { profile.startTime(QueryTimingType.ADVANCE); try { return inApproximation.advance(target); } finally { profile.stopAndRecordTime(); } } @Override public int nextDoc() throws IOException { profile.startTime(QueryTimingType.NEXT_DOC); try { return inApproximation.nextDoc(); } finally { profile.stopAndRecordTime(); } } @Override public int docID() { return inApproximation.docID(); } @Override public long cost() { return inApproximation.cost(); } }; return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { profile.startTime(QueryTimingType.MATCH); try { return in.matches(); } finally { profile.stopAndRecordTime(); } } @Override public float matchCost() { return in.matchCost(); } }; }
From source file:org.cosmo.common.model.PublicFolder.java
License:Apache License
public static long[] getAllItemRecordIds(org.cosmo.common.xml.XML rootFolder) { try {//from w w w . ja v a 2 s . c om // track duplicates - faster than HashSet OpenBitSet recordIdSet = getAllItemRecordIdSet(rootFolder); LongArrayList recordIds = new LongArrayList((int) recordIdSet.cardinality()); DocIdSetIterator recordIdIterator = recordIdSet.iterator(); for (int docId = recordIdIterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS;) { recordIds.add((long) docId); docId = recordIdIterator.nextDoc(); } return recordIds.elements(); } catch (IOException ioe) { throw new RuntimeException(ioe); } }
From source file:org.cosmo.common.record.SearchResult.java
License:Apache License
public long[] records(boolean load) throws Exception { if (Log.jlucene.getLevel() == java.util.logging.Level.FINE) { Log.jlucene.fine(Fmt.S("%s Start Fetching Records", Clock.timer().markAndCheckRunning())); }/*from w w w . j a va 2s . c om*/ DocIdSetIterator resultsIterator = _result.iterator(); Search search = _context._searchField._declaringMeta.search(); LongArrayList ids = new LongArrayList(); for (int docId = resultsIterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS;) { ids.add((long) docId); docId = resultsIterator.nextDoc(); } ids.trimToSize(); // XXX REVMOE assertAndCorrectIds() ONCE BUG IS FIXED!!!! //assertAndCorrectIds(search, ids); //Sorting.quickSort(ids.elements(), 0, ids.size(), IdComparator); throw new RuntimeException("fix below commenouted out line due to refactoring"); /*Sorting.mergeSort(ids.elements(), 0, ids.size(), RssContent.PubDateComparator.Instance); if (Log.jlucene.getLevel() == ariba.util.log.Log.DebugLevel) { Log.jlucene.debug("%s Done Fetching Records", Clock.timer().markAndCheckRunning()); } return ids.elements(); */ }
From source file:org.elasticsearch.common.lucene.docset.DocIdSets.java
License:Apache License
/** * Converts to a cacheable {@link DocIdSet} * <p/>// w ww. j a v a2 s . co m * Note, we don't use {@link org.apache.lucene.search.DocIdSet#isCacheable()} because execution * might be expensive even if its cacheable (i.e. not going back to the reader to execute). We effectively * always either return an empty {@link DocIdSet} or {@link FixedBitSet} but never <code>null</code>. */ public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException { if (set == null || set == EMPTY_DOCIDSET) { return EMPTY_DOCIDSET; } DocIdSetIterator it = set.iterator(); if (it == null) { return EMPTY_DOCIDSET; } int doc = it.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { return EMPTY_DOCIDSET; } if (set instanceof FixedBitSet) { return set; } // TODO: should we use WAH8DocIdSet like Lucene? FixedBitSet fixedBitSet = new FixedBitSet(reader.maxDoc()); do { fixedBitSet.set(doc); doc = it.nextDoc(); } while (doc != DocIdSetIterator.NO_MORE_DOCS); return fixedBitSet; }
From source file:org.elasticsearch.common.lucene.docset.DocIdSets.java
License:Apache License
/** * Creates a {@link FixedBitSet} from an iterator. *//*from w ww . java2 s . c o m*/ public static FixedBitSet toFixedBitSet(DocIdSetIterator iterator, int numBits) throws IOException { FixedBitSet set = new FixedBitSet(numBits); int doc; while ((doc = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { set.set(doc); } return set; }
From source file:org.elasticsearch.common.lucene.docset.SlicedOpenBitSetTests.java
License:Apache License
@Test public void simpleTests() throws IOException { int numberOfBits = 500; SlicedOpenBitSet bitSet = new SlicedOpenBitSet(new long[OpenBitSet.bits2words(numberOfBits) + 100], OpenBitSet.bits2words(numberOfBits), 100); bitSet.fastSet(100);//from w w w . ja v a2 s . co m assertThat(bitSet.fastGet(100), equalTo(true)); DocIdSetIterator iterator = bitSet.iterator(); assertThat(iterator.nextDoc(), equalTo(100)); assertThat(iterator.nextDoc(), equalTo(DocIdSetIterator.NO_MORE_DOCS)); }