Example usage for org.apache.lucene.search DocIdSetIterator nextDoc

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator nextDoc.

Prototype

public abstract int nextDoc() throws IOException;

Source Link

Document

Advances to the next document in the set and returns the doc it is currently on, or #NO_MORE_DOCS if there are no more docs in the set.
NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted behavior.

Usage

From source file:org.archive.porky.FindAndIntersectionsUsingPForDeltaDocIdSetUDF.java

License:Apache License

public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0)
        return null;
    DataBag bagOfBags = (DataBag) input.get(0);
    DocSet pForDeltaDocSet = null;/*from   www  .ja v  a  2  s .com*/
    ArrayList<DocIdSet> docs = new ArrayList<DocIdSet>();
    try {
        for (Tuple t : bagOfBags) {
            DataBag bag = (DataBag) t.get(0);
            pForDeltaDocSet = DocSetFactory.getPForDeltaDocSetInstance();
            for (Tuple tup : bag) {
                if (tup != null && tup.size() == 1) {
                    pForDeltaDocSet.addDoc((Integer) tup.get(0));
                }
            }
            docs.add(pForDeltaDocSet);
        }

        ArrayList<Integer> intersectedIds = new ArrayList<Integer>();
        AndDocIdSet andSet = new AndDocIdSet(docs);
        DocIdSetIterator iter = andSet.iterator();
        int docId = iter.nextDoc();
        while (docId != DocIdSetIterator.NO_MORE_DOCS) {
            intersectedIds.add(docId);
            docId = iter.nextDoc();
        }

        //return bag of intersected IDs
        DataBag resultBag = new NonSpillableDataBag(intersectedIds.size());
        for (int Id : intersectedIds) {
            Tuple newTuple = TupleFactory.getInstance().newTuple(1);
            newTuple.set(0, new Integer(Id));
            resultBag.add(newTuple);
        }
        return resultBag;
    } catch (Exception e) {
        throw WrappedIOException.wrap("Caught exception processing input row ", e);
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.Lucene.java

License:Apache License

/**
 * Check whether there is one or more documents matching the provided query.
 *//* ww  w.ja  v  a  2  s.  c  o m*/
public static boolean exists(IndexSearcher searcher, Query query) throws IOException {
    final Weight weight = searcher.createNormalizedWeight(query, false);
    // the scorer API should be more efficient at stopping after the first
    // match than the bulk scorer API
    for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
        final Scorer scorer = weight.scorer(context);
        if (scorer == null) {
            continue;
        }
        final Bits liveDocs = context.reader().getLiveDocs();
        final DocIdSetIterator iterator = scorer.iterator();
        for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
            if (liveDocs == null || liveDocs.get(doc)) {
                return true;
            }
        }
    }
    return false;
}

From source file:org.codelibs.elasticsearch.search.aggregations.bucket.nested.NestedAggregator.java

License:Apache License

@Override
public LeafBucketCollector getLeafCollector(final LeafReaderContext ctx, final LeafBucketCollector sub)
        throws IOException {
    IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(ctx);
    IndexSearcher searcher = new IndexSearcher(topLevelContext);
    searcher.setQueryCache(null);/*www.j a  v  a2  s .  c o m*/
    Weight weight = searcher.createNormalizedWeight(childFilter, false);
    Scorer childDocsScorer = weight.scorer(ctx);

    final BitSet parentDocs = parentFilter.getBitSet(ctx);
    final DocIdSetIterator childDocs = childDocsScorer != null ? childDocsScorer.iterator() : null;
    return new LeafBucketCollectorBase(sub, null) {
        @Override
        public void collect(int parentDoc, long bucket) throws IOException {
            // if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent
            // doc), so we can skip:
            if (parentDoc == 0 || parentDocs == null || childDocs == null) {
                return;
            }

            final int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
            int childDocId = childDocs.docID();
            if (childDocId <= prevParentDoc) {
                childDocId = childDocs.advance(prevParentDoc + 1);
            }

            for (; childDocId < parentDoc; childDocId = childDocs.nextDoc()) {
                collectBucket(sub, childDocId, bucket);
            }
        }
    };
}

From source file:org.codelibs.elasticsearch.search.profile.query.ProfileScorer.java

License:Apache License

@Override
public DocIdSetIterator iterator() {
    final DocIdSetIterator in = scorer.iterator();
    return new DocIdSetIterator() {

        @Override// w w w  .j  ava2  s  .com
        public int advance(int target) throws IOException {
            profile.startTime(QueryTimingType.ADVANCE);
            try {
                return in.advance(target);
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public int nextDoc() throws IOException {
            profile.startTime(QueryTimingType.NEXT_DOC);
            try {
                return in.nextDoc();
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public int docID() {
            return in.docID();
        }

        @Override
        public long cost() {
            return in.cost();
        }
    };
}

From source file:org.codelibs.elasticsearch.search.profile.query.ProfileScorer.java

License:Apache License

@Override
public TwoPhaseIterator twoPhaseIterator() {
    final TwoPhaseIterator in = scorer.twoPhaseIterator();
    if (in == null) {
        return null;
    }/*from  w w w  .j  a  v a 2 s  .co  m*/
    final DocIdSetIterator inApproximation = in.approximation();
    final DocIdSetIterator approximation = new DocIdSetIterator() {

        @Override
        public int advance(int target) throws IOException {
            profile.startTime(QueryTimingType.ADVANCE);
            try {
                return inApproximation.advance(target);
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public int nextDoc() throws IOException {
            profile.startTime(QueryTimingType.NEXT_DOC);
            try {
                return inApproximation.nextDoc();
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public int docID() {
            return inApproximation.docID();
        }

        @Override
        public long cost() {
            return inApproximation.cost();
        }
    };
    return new TwoPhaseIterator(approximation) {
        @Override
        public boolean matches() throws IOException {
            profile.startTime(QueryTimingType.MATCH);
            try {
                return in.matches();
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public float matchCost() {
            return in.matchCost();
        }
    };
}

From source file:org.cosmo.common.model.PublicFolder.java

License:Apache License

public static long[] getAllItemRecordIds(org.cosmo.common.xml.XML rootFolder) {
    try {//from   w  w w  .  ja v  a  2  s . c om
        // track duplicates - faster than HashSet
        OpenBitSet recordIdSet = getAllItemRecordIdSet(rootFolder);
        LongArrayList recordIds = new LongArrayList((int) recordIdSet.cardinality());
        DocIdSetIterator recordIdIterator = recordIdSet.iterator();
        for (int docId = recordIdIterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS;) {
            recordIds.add((long) docId);
            docId = recordIdIterator.nextDoc();
        }
        return recordIds.elements();
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}

From source file:org.cosmo.common.record.SearchResult.java

License:Apache License

public long[] records(boolean load) throws Exception {
    if (Log.jlucene.getLevel() == java.util.logging.Level.FINE) {
        Log.jlucene.fine(Fmt.S("%s Start Fetching Records", Clock.timer().markAndCheckRunning()));
    }/*from w w w  .  j a  va  2s  .  c  om*/

    DocIdSetIterator resultsIterator = _result.iterator();
    Search search = _context._searchField._declaringMeta.search();

    LongArrayList ids = new LongArrayList();

    for (int docId = resultsIterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS;) {
        ids.add((long) docId);
        docId = resultsIterator.nextDoc();
    }

    ids.trimToSize();
    // XXX REVMOE assertAndCorrectIds() ONCE BUG IS FIXED!!!!
    //assertAndCorrectIds(search, ids);
    //Sorting.quickSort(ids.elements(), 0, ids.size(), IdComparator);

    throw new RuntimeException("fix below commenouted out line due to refactoring");
    /*Sorting.mergeSort(ids.elements(), 0, ids.size(), RssContent.PubDateComparator.Instance);
            
            
            
            
    if (Log.jlucene.getLevel() == ariba.util.log.Log.DebugLevel) {
      Log.jlucene.debug("%s Done Fetching Records", Clock.timer().markAndCheckRunning());
    }
    return ids.elements();
    */
}

From source file:org.elasticsearch.common.lucene.docset.DocIdSets.java

License:Apache License

/**
 * Converts to a cacheable {@link DocIdSet}
 * <p/>// w ww. j  a  v  a2 s  .  co m
 * Note, we don't use {@link org.apache.lucene.search.DocIdSet#isCacheable()} because execution
 * might be expensive even if its cacheable (i.e. not going back to the reader to execute). We effectively
 * always either return an empty {@link DocIdSet} or {@link FixedBitSet} but never <code>null</code>.
 */
public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
    if (set == null || set == EMPTY_DOCIDSET) {
        return EMPTY_DOCIDSET;
    }
    DocIdSetIterator it = set.iterator();
    if (it == null) {
        return EMPTY_DOCIDSET;
    }
    int doc = it.nextDoc();
    if (doc == DocIdSetIterator.NO_MORE_DOCS) {
        return EMPTY_DOCIDSET;
    }
    if (set instanceof FixedBitSet) {
        return set;
    }
    // TODO: should we use WAH8DocIdSet like Lucene?
    FixedBitSet fixedBitSet = new FixedBitSet(reader.maxDoc());
    do {
        fixedBitSet.set(doc);
        doc = it.nextDoc();
    } while (doc != DocIdSetIterator.NO_MORE_DOCS);
    return fixedBitSet;
}

From source file:org.elasticsearch.common.lucene.docset.DocIdSets.java

License:Apache License

/**
 * Creates a {@link FixedBitSet} from an iterator.
 *//*from  w  ww .  java2 s  . c o m*/
public static FixedBitSet toFixedBitSet(DocIdSetIterator iterator, int numBits) throws IOException {
    FixedBitSet set = new FixedBitSet(numBits);
    int doc;
    while ((doc = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        set.set(doc);
    }
    return set;
}

From source file:org.elasticsearch.common.lucene.docset.SlicedOpenBitSetTests.java

License:Apache License

@Test
public void simpleTests() throws IOException {
    int numberOfBits = 500;
    SlicedOpenBitSet bitSet = new SlicedOpenBitSet(new long[OpenBitSet.bits2words(numberOfBits) + 100],
            OpenBitSet.bits2words(numberOfBits), 100);

    bitSet.fastSet(100);//from  w  w  w . ja v a2  s  .  co  m
    assertThat(bitSet.fastGet(100), equalTo(true));

    DocIdSetIterator iterator = bitSet.iterator();
    assertThat(iterator.nextDoc(), equalTo(100));
    assertThat(iterator.nextDoc(), equalTo(DocIdSetIterator.NO_MORE_DOCS));
}