Example usage for org.apache.lucene.search DocIdSetIterator nextDoc

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator nextDoc.

Prototype

public abstract int nextDoc() throws IOException;

Source Link

Document

Advances to the next document in the set and returns the doc it is currently on, or #NO_MORE_DOCS if there are no more docs in the set.
NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted behavior.

Usage

From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollectorTest.java

License:Open Source License

@Test
public void shouldCollectAllHitsPerSegment() throws Exception {
    // given//from  w  ww  .  j a va 2  s . c om
    DocValuesCollector collector = new DocValuesCollector();
    IndexReaderStub readerStub = indexReaderWithMaxDocs(42);

    // when
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(1);
    collector.collect(3);
    collector.collect(5);
    collector.collect(9);

    // then
    assertEquals(4, collector.getTotalHits());
    List<DocValuesCollector.MatchingDocs> allMatchingDocs = collector.getMatchingDocs();
    assertEquals(1, allMatchingDocs.size());
    DocValuesCollector.MatchingDocs matchingDocs = allMatchingDocs.get(0);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(4, matchingDocs.totalHits);
    DocIdSetIterator idIterator = matchingDocs.docIdSet.iterator();
    assertEquals(1, idIterator.nextDoc());
    assertEquals(3, idIterator.nextDoc());
    assertEquals(5, idIterator.nextDoc());
    assertEquals(9, idIterator.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, idIterator.nextDoc());
}

From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollectorTest.java

License:Open Source License

@Test
public void shouldCollectOneMatchingDocsPerSegment() throws Exception {
    // given/*from  ww w .j  a  va2 s  .c  om*/
    DocValuesCollector collector = new DocValuesCollector();
    IndexReaderStub readerStub = indexReaderWithMaxDocs(42);

    // when
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(1);
    collector.collect(3);
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(5);
    collector.collect(9);

    // then
    assertEquals(4, collector.getTotalHits());
    List<DocValuesCollector.MatchingDocs> allMatchingDocs = collector.getMatchingDocs();
    assertEquals(2, allMatchingDocs.size());

    DocValuesCollector.MatchingDocs matchingDocs = allMatchingDocs.get(0);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(2, matchingDocs.totalHits);
    DocIdSetIterator idIterator = matchingDocs.docIdSet.iterator();
    assertEquals(1, idIterator.nextDoc());
    assertEquals(3, idIterator.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, idIterator.nextDoc());

    matchingDocs = allMatchingDocs.get(1);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(2, matchingDocs.totalHits);
    idIterator = matchingDocs.docIdSet.iterator();
    assertEquals(5, idIterator.nextDoc());
    assertEquals(9, idIterator.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, idIterator.nextDoc());
}

From source file:org.neo4j.kernel.api.impl.index.DocValuesCollector.java

License:Open Source License

private void replayTo(Collector collector) throws IOException {
    for (MatchingDocs docs : getMatchingDocs()) {
        LeafCollector leafCollector = collector.getLeafCollector(docs.context);
        Scorer scorer;//from w  w w .  ja  v  a  2  s.c o  m
        DocIdSetIterator disi = docs.docIdSet.iterator();
        if (isKeepScores()) {
            scorer = new ReplayingScorer(docs.scores);
        } else {
            scorer = new ConstantScoreScorer(null, Float.NaN, disi);
        }
        leafCollector.setScorer(scorer);
        int doc;
        while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            leafCollector.collect(doc);
        }
    }
}

From source file:org.neo4j.kernel.api.impl.index.DocValuesCollectorTest.java

License:Open Source License

@Test
public void shouldCollectAllHitsPerSegment() throws Exception {
    // given/*  w w  w  . jav  a 2s  . co  m*/
    DocValuesCollector collector = new DocValuesCollector();
    IndexReaderStub readerStub = indexReaderWithMaxDocs(42);

    // when
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(1);
    collector.collect(3);
    collector.collect(5);
    collector.collect(9);

    // then
    assertEquals(4, collector.getTotalHits());
    List<DocValuesCollector.MatchingDocs> allMatchingDocs = collector.getMatchingDocs();
    assertEquals(1, allMatchingDocs.size());
    DocValuesCollector.MatchingDocs matchingDocs = allMatchingDocs.get(0);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(4, matchingDocs.totalHits);
    DocIdSetIterator disi = matchingDocs.docIdSet.iterator();
    assertEquals(1, disi.nextDoc());
    assertEquals(3, disi.nextDoc());
    assertEquals(5, disi.nextDoc());
    assertEquals(9, disi.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
}

From source file:org.neo4j.kernel.api.impl.index.DocValuesCollectorTest.java

License:Open Source License

@Test
public void shouldCollectOneMatchingDocsPerSegment() throws Exception {
    // given/* w w  w.  ja v a2  s. c  o  m*/
    DocValuesCollector collector = new DocValuesCollector();
    IndexReaderStub readerStub = indexReaderWithMaxDocs(42);

    // when
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(1);
    collector.collect(3);
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(5);
    collector.collect(9);

    // then
    assertEquals(4, collector.getTotalHits());
    List<DocValuesCollector.MatchingDocs> allMatchingDocs = collector.getMatchingDocs();
    assertEquals(2, allMatchingDocs.size());

    DocValuesCollector.MatchingDocs matchingDocs = allMatchingDocs.get(0);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(2, matchingDocs.totalHits);
    DocIdSetIterator disi = matchingDocs.docIdSet.iterator();
    assertEquals(1, disi.nextDoc());
    assertEquals(3, disi.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());

    matchingDocs = allMatchingDocs.get(1);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(2, matchingDocs.totalHits);
    disi = matchingDocs.docIdSet.iterator();
    assertEquals(5, disi.nextDoc());
    assertEquals(9, disi.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
}

From source file:org.opengrok.indexer.search.context.OGKUnifiedHighlighter.java

License:Apache License

/**
 * Produces original text by reading from OpenGrok source content relative
 * to {@link RuntimeEnvironment#getSourceRootPath()} and returns the content
 * for each document if the timestamp matches -- or else just {@code null}
 * for a missing file or a timestamp mismatch (as "the returned Strings must
 * be identical to what was indexed.")// ww w  .ja  va 2s  . c om
 * <p>
 * "This method must load fields for at least one document from the given
 * {@link DocIdSetIterator} but need not return all of them; by default the
 * character lengths are summed and this method will return early when
 * {@code cacheCharsThreshold} is exceeded. Specifically if that number is
 * 0, then only one document is fetched no matter what. Values in the array
 * of {@link CharSequence} will be {@code null} if no value was found."
 * @return a defined instance
 * @throws IOException if an I/O error occurs
 */
@Override
protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter,
        int cacheCharsThreshold) throws IOException {

    List<CharSequence[]> docListOfFields = new ArrayList<>(
            cacheCharsThreshold == 0 ? 1 : (int) Math.min(64, docIter.cost()));

    int sumChars = 0;
    do {
        int docId = docIter.nextDoc();
        if (docId == DocIdSetIterator.NO_MORE_DOCS) {
            break;
        }
        Document doc = searcher.doc(docId);

        String path = doc.get(QueryBuilder.PATH);
        String storedU = doc.get(QueryBuilder.U);
        String content = getRepoFileContent(path, storedU);

        CharSequence[] seqs = new CharSequence[fields.length];
        Arrays.fill(seqs, content);
        docListOfFields.add(seqs);

        if (content != null) {
            sumChars += content.length();
        }
    } while (sumChars <= cacheCharsThreshold && cacheCharsThreshold != 0);

    return docListOfFields;
}

From source file:org.opensextant.solrtexttagger.TaggerRequestHandler.java

License:Open Source License

private DocList getDocList(int rows, FixedBitSet matchDocIdsBS) throws IOException {
    //Now we must supply a Solr DocList and add it to the response.
    //  Typically this is gotten via a SolrIndexSearcher.search(), but in this case we
    //  know exactly what documents to return, the order doesn't matter nor does
    //  scoring.//from www .  ja va 2s . c om
    //  Ideally an implementation of DocList could be directly implemented off
    //  of a BitSet, but there are way too many methods to implement for a minor
    //  payoff.
    int matchDocs = matchDocIdsBS.cardinality();
    int[] docIds = new int[Math.min(rows, matchDocs)];
    DocIdSetIterator docIdIter = new BitSetIterator(matchDocIdsBS, 1);
    for (int i = 0; i < docIds.length; i++) {
        docIds[i] = docIdIter.nextDoc();
    }
    return new DocSlice(0, docIds.length, docIds, null, matchDocs, 1f);
}

From source file:org.voyanttools.trombone.tool.corpus.DocumentNgrams.java

License:Open Source License

List<DocumentNgram> getNgrams(CorpusMapper corpusMapper, Keywords stopwords) throws IOException {
    Corpus corpus = corpusMapper.getCorpus();
    int[] totalTokens = corpus.getLastTokenPositions(tokenType);
    FlexibleQueue<DocumentNgram> queue = new FlexibleQueue<DocumentNgram>(comparator, start + limit);

    Set<String> validIds = new HashSet<String>();
    validIds.addAll(this.getCorpusStoredDocumentIdsFromParameters(corpus));
    OverlapFilter filter = getDocumentNgramsOverlapFilter(parameters);
    DocIdSetIterator it = corpusMapper.getDocIdSet().iterator();
    while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        int luceneDoc = it.docID();
        String docId = corpusMapper.getDocumentIdFromLuceneId(luceneDoc);
        if (validIds.contains(docId) == false) {
            continue;
        }//  w  w  w . j  a v  a2  s  . c  om
        int corpusDocumentIndex = corpusMapper.getDocumentPositionFromLuceneId(luceneDoc);
        int lastToken = totalTokens[corpusDocumentIndex];

        // build single grams as seed for ngrams
        SimplifiedTermInfo[] sparseSimplifiedTermInfoArray = getSparseSimplifiedTermInfoArray(corpusMapper,
                luceneDoc, lastToken);

        Map<String, List<int[]>> stringPositionsMap = new HashMap<String, List<int[]>>();
        for (int i = 0, len = sparseSimplifiedTermInfoArray.length; i < len; i++) {
            if (sparseSimplifiedTermInfoArray[i] != null
                    && sparseSimplifiedTermInfoArray[i].term.isEmpty() == false) {
                if (stringPositionsMap.containsKey(sparseSimplifiedTermInfoArray[i].term) == false) {
                    List<int[]> l = new ArrayList<int[]>();
                    l.add(new int[] { i, i });
                    stringPositionsMap.put(sparseSimplifiedTermInfoArray[i].term, l);
                } else {
                    stringPositionsMap.get(sparseSimplifiedTermInfoArray[i].term).add(new int[] { i, i });
                }
            }
        }

        List<DocumentNgram> ngrams = getNgramsFromStringPositions(stringPositionsMap, corpusDocumentIndex, 1);
        ngrams = getNextNgrams(ngrams, sparseSimplifiedTermInfoArray, corpusDocumentIndex, 2);

        ngrams = filter.getFilteredNgrams(ngrams, lastToken);

        for (DocumentNgram ngram : ngrams) {
            if (ngram.getLength() >= minLength && ngram.getLength() <= maxLength) {
                queue.offer(ngram);
            }
        }
    }

    return queue.getOrderedList(start);

}

From source file:org.zenoss.zep.index.impl.lucene.LuceneEventIndexBackend.java

License:Open Source License

protected void searchEventTagSeverities(EventFilter filter, EventTagSeverityCounter counter)
        throws ZepException {
    final boolean hasTagsFilter = filter.getTagFilterCount() > 0;
    IndexSearcher searcher = null;//from  w w w  .j a  v a2 s.  c om
    try {
        searcher = getSearcher();
        final Query query = buildQueryFromFilter(searcher.getIndexReader(), filter);
        final OpenBitSet docs = new OpenBitSet(searcher.getIndexReader().maxDoc());
        searcher.search(query, new Collector() {
            private int docBase;

            @Override
            public void setScorer(Scorer scorer) throws IOException {
            }

            @Override
            public void collect(int doc) throws IOException {
                docs.set(docBase + doc);
            }

            @Override
            public void setNextReader(AtomicReaderContext atomicReaderContext) throws IOException {
                this.docBase = atomicReaderContext.docBase;
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
        });
        int docId;
        final DocIdSetIterator it = docs.iterator();
        while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            final EventSummary summary;
            if (this.archive) {
                // TODO: This isn't very cheap - would be better to batch by UUID in separate calls
                // This doesn't get called on the event archive right now, so leave it until need to optimize.
                Document doc = searcher.doc(docId, UUID_FIELDS);
                summary = this.eventSummaryBaseDao.findByUuid(doc.get(FIELD_UUID));
            } else {
                Document doc = searcher.doc(docId);
                // this is an optimization for getting the non-archived tags from an organizer for ticket
                // see ZEN-7239. For this ticket we updated the index to store what we needed for generating the
                // tags severities. Since we do not want a migrate of completely deleting the index this
                // method is backwards compatible by uncompressing the protobuf
                if (doc.get(FIELD_SEVERITY) != null) {
                    int count = Integer.parseInt(doc.get(FIELD_COUNT));
                    boolean acknowledged = EventStatus.STATUS_ACKNOWLEDGED
                            .equals(EventStatus.valueOf(Integer.parseInt(doc.get(FIELD_STATUS))));
                    EventSeverity severity = EventSeverity.valueOf(Integer.parseInt(doc.get(FIELD_SEVERITY)));

                    // get the map for each filter and update the count
                    for (String tag : doc.getValues(FIELD_TAGS))
                        counter.update(tag, severity, count, acknowledged);
                    continue;
                } else {
                    summary = LuceneEventIndexMapper.toEventSummary(doc);
                }
            }
            boolean acknowledged = EventStatus.STATUS_ACKNOWLEDGED == summary.getStatus();
            Event occurrence = summary.getOccurrence(0);
            EventSeverity severity = occurrence.getSeverity();
            int count = occurrence.getCount();
            EventActor actor = occurrence.getActor();

            // Build tags from element_uuids - no tags specified in filter
            if (!hasTagsFilter) {
                if (actor.hasElementUuid())
                    counter.update(actor.getElementUuid(), severity, count, acknowledged);
            }
            // Build tag severities from passed in filter
            else {
                for (String uuid : Arrays.asList(actor.getElementUuid(), actor.getElementSubUuid()))
                    counter.update(uuid, severity, count, acknowledged);
                for (EventTag tag : occurrence.getTagsList())
                    for (String tagUuid : tag.getUuidList())
                        counter.update(tagUuid, severity, count, acknowledged);
            }
        }
    } catch (IOException e) {
        throw new ZepException(e);
    } catch (OutOfMemoryError e) {
        closeSearcherManager();
        throw e;
    } finally {
        returnSearcher(searcher);
    }
}

From source file:perf.DiskUsage.java

License:Apache License

static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception {
    Map<String, FieldStats> stats = new HashMap<>();
    Map<String, String> dvSuffixes = new HashMap<>();
    Map<String, String> postingsSuffixes = new HashMap<>();
    for (FieldInfo field : reader.getFieldInfos()) {
        FieldStats fieldStats = new FieldStats(field.name);
        stats.put(field.name, fieldStats);
        Map<String, String> attributes = field.attributes();
        if (attributes != null) {
            String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
            if (postingsSuffix != null) {
                postingsSuffixes.put(postingsSuffix, field.name);
            }/*  w w  w  . jav  a  2  s  .  c  o m*/
            String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY);
            if (dvSuffix != null) {
                dvSuffixes.put(dvSuffix, field.name);
            }
        }

        DocIdSetIterator docsWithField;
        switch (field.getDocValuesType()) {
        case NUMERIC:
            docsWithField = reader.getNumericDocValues(field.name);
            break;
        case BINARY:
            docsWithField = reader.getBinaryDocValues(field.name);
            break;
        case SORTED:
            docsWithField = reader.getSortedDocValues(field.name);
            break;
        case SORTED_NUMERIC:
            docsWithField = reader.getSortedNumericDocValues(field.name);
            break;
        case SORTED_SET:
            docsWithField = reader.getSortedSetDocValues(field.name);
            break;
        case NONE:
            docsWithField = null;
            break;
        default:
            docsWithField = null;
            break;
        }

        if (docsWithField != null) {
            int count = 0;
            while (docsWithField.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                count++;
            }
            fieldStats.docCountWithField = count;
        }
    }

    Directory directory = reader.directory();
    for (String file : directory.listAll()) {
        String suffix = parseSuffix(file);
        long bytes = directory.fileLength(file);
        if (suffix != null) {
            switch (IndexFileNames.getExtension(file)) {
            case "dvd":
            case "dvm":
                stats.get(dvSuffixes.get(suffix)).dvBytes += bytes;
                break;
            case "tim":
            case "tip":
                stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes;
                break;
            case "doc":
                stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes;
                break;
            case "pos":
            case "pay":
                stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes;
                break;
            default:
                throw new AssertionError("unexpected suffixed file: " + file);
            }
        } else {
            // not a per-field file, but we can hackishly do this for the points case.
            if ("dii".equals(IndexFileNames.getExtension(file))) {
                System.err.println(
                        "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!");
                try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) {
                    // fail hard if its not exactly the version we do this hack for.
                    CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0,
                            reader.getSegmentInfo().info.getId(), "");
                    int fieldCount = in.readVInt();
                    // strangely, bkd offsets are not in any guaranteed order
                    TreeMap<Long, String> offsetToField = new TreeMap<>();
                    for (int i = 0; i < fieldCount; i++) {
                        int field = in.readVInt();
                        long offset = in.readVLong();
                        offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name);
                    }
                    // now we can traverse in order
                    long previousOffset = 0;
                    for (Map.Entry<Long, String> entry : offsetToField.entrySet()) {
                        long offset = entry.getKey();
                        String field = entry.getValue();
                        stats.get(field).pointsBytes += (offset - previousOffset);
                        previousOffset = offset;
                    }
                    CodecUtil.checkFooter(in);
                }
            }
        }
    }

    return new TreeSet<FieldStats>(stats.values());
}