Example usage for org.apache.lucene.search DocIdSetIterator nextDoc

List of usage examples for org.apache.lucene.search DocIdSetIterator nextDoc

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator nextDoc.

Prototype

public abstract int nextDoc() throws IOException;

Source Link

Document

Advances to the next document in the set and returns the doc it is currently on, or #NO_MORE_DOCS if there are no more docs in the set.
NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted behavior.

Usage

From source file:org.archive.porky.FindAndIntersectionsUsingPForDeltaDocIdSetUDF.java

License:Apache License

public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0)
        return null;
    DataBag bagOfBags = (DataBag) input.get(0);
    DocSet pForDeltaDocSet = null;/*from   www  .ja v  a  2  s .com*/
    ArrayList<DocIdSet> docs = new ArrayList<DocIdSet>();
    try {
        for (Tuple t : bagOfBags) {
            DataBag bag = (DataBag) t.get(0);
            pForDeltaDocSet = DocSetFactory.getPForDeltaDocSetInstance();
            for (Tuple tup : bag) {
                if (tup != null && tup.size() == 1) {
                    pForDeltaDocSet.addDoc((Integer) tup.get(0));
                }
            }
            docs.add(pForDeltaDocSet);
        }

        ArrayList<Integer> intersectedIds = new ArrayList<Integer>();
        AndDocIdSet andSet = new AndDocIdSet(docs);
        DocIdSetIterator iter = andSet.iterator();
        int docId = iter.nextDoc();
        while (docId != DocIdSetIterator.NO_MORE_DOCS) {
            intersectedIds.add(docId);
            docId = iter.nextDoc();
        }

        //return bag of intersected IDs
        DataBag resultBag = new NonSpillableDataBag(intersectedIds.size());
        for (int Id : intersectedIds) {
            Tuple newTuple = TupleFactory.getInstance().newTuple(1);
            newTuple.set(0, new Integer(Id));
            resultBag.add(newTuple);
        }
        return resultBag;
    } catch (Exception e) {
        throw WrappedIOException.wrap("Caught exception processing input row ", e);
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.Lucene.java

License:Apache License

/**
 * Check whether there is one or more documents matching the provided query.
 *//* ww  w.ja  v  a  2  s.  c  o m*/
public static boolean exists(IndexSearcher searcher, Query query) throws IOException {
    final Weight weight = searcher.createNormalizedWeight(query, false);
    // the scorer API should be more efficient at stopping after the first
    // match than the bulk scorer API
    for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
        final Scorer scorer = weight.scorer(context);
        if (scorer == null) {
            continue;
        }
        final Bits liveDocs = context.reader().getLiveDocs();
        final DocIdSetIterator iterator = scorer.iterator();
        for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
            if (liveDocs == null || liveDocs.get(doc)) {
                return true;
            }
        }
    }
    return false;
}

From source file:org.codelibs.elasticsearch.search.aggregations.bucket.nested.NestedAggregator.java

License:Apache License

@Override
public LeafBucketCollector getLeafCollector(final LeafReaderContext ctx, final LeafBucketCollector sub)
        throws IOException {
    IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(ctx);
    IndexSearcher searcher = new IndexSearcher(topLevelContext);
    searcher.setQueryCache(null);/*www.j a  v  a2  s .  c o m*/
    Weight weight = searcher.createNormalizedWeight(childFilter, false);
    Scorer childDocsScorer = weight.scorer(ctx);

    final BitSet parentDocs = parentFilter.getBitSet(ctx);
    final DocIdSetIterator childDocs = childDocsScorer != null ? childDocsScorer.iterator() : null;
    return new LeafBucketCollectorBase(sub, null) {
        @Override
        public void collect(int parentDoc, long bucket) throws IOException {
            // if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent
            // doc), so we can skip:
            if (parentDoc == 0 || parentDocs == null || childDocs == null) {
                return;
            }

            final int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
            int childDocId = childDocs.docID();
            if (childDocId <= prevParentDoc) {
                childDocId = childDocs.advance(prevParentDoc + 1);
            }

            for (; childDocId < parentDoc; childDocId = childDocs.nextDoc()) {
                collectBucket(sub, childDocId, bucket);
            }
        }
    };
}

From source file:org.codelibs.elasticsearch.search.profile.query.ProfileScorer.java

License:Apache License

@Override
public DocIdSetIterator iterator() {
    final DocIdSetIterator in = scorer.iterator();
    return new DocIdSetIterator() {

        @Override// w w w  .j  ava2  s  .com
        public int advance(int target) throws IOException {
            profile.startTime(QueryTimingType.ADVANCE);
            try {
                return in.advance(target);
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public int nextDoc() throws IOException {
            profile.startTime(QueryTimingType.NEXT_DOC);
            try {
                return in.nextDoc();
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public int docID() {
            return in.docID();
        }

        @Override
        public long cost() {
            return in.cost();
        }
    };
}

From source file:org.codelibs.elasticsearch.search.profile.query.ProfileScorer.java

License:Apache License

@Override
public TwoPhaseIterator twoPhaseIterator() {
    final TwoPhaseIterator in = scorer.twoPhaseIterator();
    if (in == null) {
        return null;
    }/*from  w w w  .j  a  v a 2 s  .co  m*/
    final DocIdSetIterator inApproximation = in.approximation();
    final DocIdSetIterator approximation = new DocIdSetIterator() {

        @Override
        public int advance(int target) throws IOException {
            profile.startTime(QueryTimingType.ADVANCE);
            try {
                return inApproximation.advance(target);
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public int nextDoc() throws IOException {
            profile.startTime(QueryTimingType.NEXT_DOC);
            try {
                return inApproximation.nextDoc();
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public int docID() {
            return inApproximation.docID();
        }

        @Override
        public long cost() {
            return inApproximation.cost();
        }
    };
    return new TwoPhaseIterator(approximation) {
        @Override
        public boolean matches() throws IOException {
            profile.startTime(QueryTimingType.MATCH);
            try {
                return in.matches();
            } finally {
                profile.stopAndRecordTime();
            }
        }

        @Override
        public float matchCost() {
            return in.matchCost();
        }
    };
}

From source file:org.cosmo.common.model.PublicFolder.java

License:Apache License

public static long[] getAllItemRecordIds(org.cosmo.common.xml.XML rootFolder) {
    try {//from   w  w w  .  ja v  a  2  s . c om
        // track duplicates - faster than HashSet
        OpenBitSet recordIdSet = getAllItemRecordIdSet(rootFolder);
        LongArrayList recordIds = new LongArrayList((int) recordIdSet.cardinality());
        DocIdSetIterator recordIdIterator = recordIdSet.iterator();
        for (int docId = recordIdIterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS;) {
            recordIds.add((long) docId);
            docId = recordIdIterator.nextDoc();
        }
        return recordIds.elements();
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}

From source file:org.cosmo.common.record.SearchResult.java

License:Apache License

public long[] records(boolean load) throws Exception {
    if (Log.jlucene.getLevel() == java.util.logging.Level.FINE) {
        Log.jlucene.fine(Fmt.S("%s Start Fetching Records", Clock.timer().markAndCheckRunning()));
    }/*from w w w  .  j a  va  2s  .  c  om*/

    DocIdSetIterator resultsIterator = _result.iterator();
    Search search = _context._searchField._declaringMeta.search();

    LongArrayList ids = new LongArrayList();

    for (int docId = resultsIterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS;) {
        ids.add((long) docId);
        docId = resultsIterator.nextDoc();
    }

    ids.trimToSize();
    // XXX REVMOE assertAndCorrectIds() ONCE BUG IS FIXED!!!!
    //assertAndCorrectIds(search, ids);
    //Sorting.quickSort(ids.elements(), 0, ids.size(), IdComparator);

    throw new RuntimeException("fix below commenouted out line due to refactoring");
    /*Sorting.mergeSort(ids.elements(), 0, ids.size(), RssContent.PubDateComparator.Instance);
            
            
            
            
    if (Log.jlucene.getLevel() == ariba.util.log.Log.DebugLevel) {
      Log.jlucene.debug("%s Done Fetching Records", Clock.timer().markAndCheckRunning());
    }
    return ids.elements();
    */
}

From source file:org.elasticsearch.common.lucene.docset.DocIdSets.java

License:Apache License

/**
 * Converts to a cacheable {@link DocIdSet}
 * <p/>// w ww. j  a  v  a2 s  .  co m
 * Note, we don't use {@link org.apache.lucene.search.DocIdSet#isCacheable()} because execution
 * might be expensive even if its cacheable (i.e. not going back to the reader to execute). We effectively
 * always either return an empty {@link DocIdSet} or {@link FixedBitSet} but never <code>null</code>.
 */
public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
    if (set == null || set == EMPTY_DOCIDSET) {
        return EMPTY_DOCIDSET;
    }
    DocIdSetIterator it = set.iterator();
    if (it == null) {
        return EMPTY_DOCIDSET;
    }
    int doc = it.nextDoc();
    if (doc == DocIdSetIterator.NO_MORE_DOCS) {
        return EMPTY_DOCIDSET;
    }
    if (set instanceof FixedBitSet) {
        return set;
    }
    // TODO: should we use WAH8DocIdSet like Lucene?
    FixedBitSet fixedBitSet = new FixedBitSet(reader.maxDoc());
    do {
        fixedBitSet.set(doc);
        doc = it.nextDoc();
    } while (doc != DocIdSetIterator.NO_MORE_DOCS);
    return fixedBitSet;
}

From source file:org.elasticsearch.common.lucene.docset.DocIdSets.java

License:Apache License

/**
 * Creates a {@link FixedBitSet} from an iterator.
 *//*from  w  ww .  java2 s  . c o m*/
public static FixedBitSet toFixedBitSet(DocIdSetIterator iterator, int numBits) throws IOException {
    FixedBitSet set = new FixedBitSet(numBits);
    int doc;
    while ((doc = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        set.set(doc);
    }
    return set;
}

From source file:org.elasticsearch.common.lucene.docset.SlicedOpenBitSetTests.java

License:Apache License

@Test
public void simpleTests() throws IOException {
    int numberOfBits = 500;
    SlicedOpenBitSet bitSet = new SlicedOpenBitSet(new long[OpenBitSet.bits2words(numberOfBits) + 100],
            OpenBitSet.bits2words(numberOfBits), 100);

    bitSet.fastSet(100);//from  w  w  w . ja v a2  s  .  co  m
    assertThat(bitSet.fastGet(100), equalTo(true));

    DocIdSetIterator iterator = bitSet.iterator();
    assertThat(iterator.nextDoc(), equalTo(100));
    assertThat(iterator.nextDoc(), equalTo(DocIdSetIterator.NO_MORE_DOCS));
}