Example usage for org.apache.lucene.search DocIdSetIterator nextDoc

List of usage examples for org.apache.lucene.search DocIdSetIterator nextDoc

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator nextDoc.

Prototype

public abstract int nextDoc() throws IOException;

Source Link

Document

Advances to the next document in the set and returns the doc it is currently on, or #NO_MORE_DOCS if there are no more docs in the set.
NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted behavior.

Usage

From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollectorTest.java

License:Open Source License

@Test
public void shouldCollectAllHitsPerSegment() throws Exception {
    // given//from  w  ww  .  j a va 2  s . c om
    DocValuesCollector collector = new DocValuesCollector();
    IndexReaderStub readerStub = indexReaderWithMaxDocs(42);

    // when
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(1);
    collector.collect(3);
    collector.collect(5);
    collector.collect(9);

    // then
    assertEquals(4, collector.getTotalHits());
    List<DocValuesCollector.MatchingDocs> allMatchingDocs = collector.getMatchingDocs();
    assertEquals(1, allMatchingDocs.size());
    DocValuesCollector.MatchingDocs matchingDocs = allMatchingDocs.get(0);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(4, matchingDocs.totalHits);
    DocIdSetIterator idIterator = matchingDocs.docIdSet.iterator();
    assertEquals(1, idIterator.nextDoc());
    assertEquals(3, idIterator.nextDoc());
    assertEquals(5, idIterator.nextDoc());
    assertEquals(9, idIterator.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, idIterator.nextDoc());
}

From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollectorTest.java

License:Open Source License

@Test
public void shouldCollectOneMatchingDocsPerSegment() throws Exception {
    // given/*from  ww w .j  a  va2 s  .c  om*/
    DocValuesCollector collector = new DocValuesCollector();
    IndexReaderStub readerStub = indexReaderWithMaxDocs(42);

    // when
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(1);
    collector.collect(3);
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(5);
    collector.collect(9);

    // then
    assertEquals(4, collector.getTotalHits());
    List<DocValuesCollector.MatchingDocs> allMatchingDocs = collector.getMatchingDocs();
    assertEquals(2, allMatchingDocs.size());

    DocValuesCollector.MatchingDocs matchingDocs = allMatchingDocs.get(0);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(2, matchingDocs.totalHits);
    DocIdSetIterator idIterator = matchingDocs.docIdSet.iterator();
    assertEquals(1, idIterator.nextDoc());
    assertEquals(3, idIterator.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, idIterator.nextDoc());

    matchingDocs = allMatchingDocs.get(1);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(2, matchingDocs.totalHits);
    idIterator = matchingDocs.docIdSet.iterator();
    assertEquals(5, idIterator.nextDoc());
    assertEquals(9, idIterator.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, idIterator.nextDoc());
}

From source file:org.neo4j.kernel.api.impl.index.DocValuesCollector.java

License:Open Source License

private void replayTo(Collector collector) throws IOException {
    for (MatchingDocs docs : getMatchingDocs()) {
        LeafCollector leafCollector = collector.getLeafCollector(docs.context);
        Scorer scorer;//from w  w w .  ja  v  a  2  s.c o  m
        DocIdSetIterator disi = docs.docIdSet.iterator();
        if (isKeepScores()) {
            scorer = new ReplayingScorer(docs.scores);
        } else {
            scorer = new ConstantScoreScorer(null, Float.NaN, disi);
        }
        leafCollector.setScorer(scorer);
        int doc;
        while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            leafCollector.collect(doc);
        }
    }
}

From source file:org.neo4j.kernel.api.impl.index.DocValuesCollectorTest.java

License:Open Source License

@Test
public void shouldCollectAllHitsPerSegment() throws Exception {
    // given/*  w w  w  . jav  a 2s  . co  m*/
    DocValuesCollector collector = new DocValuesCollector();
    IndexReaderStub readerStub = indexReaderWithMaxDocs(42);

    // when
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(1);
    collector.collect(3);
    collector.collect(5);
    collector.collect(9);

    // then
    assertEquals(4, collector.getTotalHits());
    List<DocValuesCollector.MatchingDocs> allMatchingDocs = collector.getMatchingDocs();
    assertEquals(1, allMatchingDocs.size());
    DocValuesCollector.MatchingDocs matchingDocs = allMatchingDocs.get(0);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(4, matchingDocs.totalHits);
    DocIdSetIterator disi = matchingDocs.docIdSet.iterator();
    assertEquals(1, disi.nextDoc());
    assertEquals(3, disi.nextDoc());
    assertEquals(5, disi.nextDoc());
    assertEquals(9, disi.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
}

From source file:org.neo4j.kernel.api.impl.index.DocValuesCollectorTest.java

License:Open Source License

@Test
public void shouldCollectOneMatchingDocsPerSegment() throws Exception {
    // given/* w w  w.  ja v a2  s. c  o  m*/
    DocValuesCollector collector = new DocValuesCollector();
    IndexReaderStub readerStub = indexReaderWithMaxDocs(42);

    // when
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(1);
    collector.collect(3);
    collector.doSetNextReader(readerStub.getContext());
    collector.collect(5);
    collector.collect(9);

    // then
    assertEquals(4, collector.getTotalHits());
    List<DocValuesCollector.MatchingDocs> allMatchingDocs = collector.getMatchingDocs();
    assertEquals(2, allMatchingDocs.size());

    DocValuesCollector.MatchingDocs matchingDocs = allMatchingDocs.get(0);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(2, matchingDocs.totalHits);
    DocIdSetIterator disi = matchingDocs.docIdSet.iterator();
    assertEquals(1, disi.nextDoc());
    assertEquals(3, disi.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());

    matchingDocs = allMatchingDocs.get(1);
    assertSame(readerStub.getContext(), matchingDocs.context);
    assertEquals(2, matchingDocs.totalHits);
    disi = matchingDocs.docIdSet.iterator();
    assertEquals(5, disi.nextDoc());
    assertEquals(9, disi.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
}

From source file:org.opengrok.indexer.search.context.OGKUnifiedHighlighter.java

License:Apache License

/**
 * Produces original text by reading from OpenGrok source content relative
 * to {@link RuntimeEnvironment#getSourceRootPath()} and returns the content
 * for each document if the timestamp matches -- or else just {@code null}
 * for a missing file or a timestamp mismatch (as "the returned Strings must
 * be identical to what was indexed.")// ww w  .ja  va 2s  . c om
 * <p>
 * "This method must load fields for at least one document from the given
 * {@link DocIdSetIterator} but need not return all of them; by default the
 * character lengths are summed and this method will return early when
 * {@code cacheCharsThreshold} is exceeded. Specifically if that number is
 * 0, then only one document is fetched no matter what. Values in the array
 * of {@link CharSequence} will be {@code null} if no value was found."
 * @return a defined instance
 * @throws IOException if an I/O error occurs
 */
@Override
protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter,
        int cacheCharsThreshold) throws IOException {

    List<CharSequence[]> docListOfFields = new ArrayList<>(
            cacheCharsThreshold == 0 ? 1 : (int) Math.min(64, docIter.cost()));

    int sumChars = 0;
    do {
        int docId = docIter.nextDoc();
        if (docId == DocIdSetIterator.NO_MORE_DOCS) {
            break;
        }
        Document doc = searcher.doc(docId);

        String path = doc.get(QueryBuilder.PATH);
        String storedU = doc.get(QueryBuilder.U);
        String content = getRepoFileContent(path, storedU);

        CharSequence[] seqs = new CharSequence[fields.length];
        Arrays.fill(seqs, content);
        docListOfFields.add(seqs);

        if (content != null) {
            sumChars += content.length();
        }
    } while (sumChars <= cacheCharsThreshold && cacheCharsThreshold != 0);

    return docListOfFields;
}

From source file:org.opensextant.solrtexttagger.TaggerRequestHandler.java

License:Open Source License

private DocList getDocList(int rows, FixedBitSet matchDocIdsBS) throws IOException {
    //Now we must supply a Solr DocList and add it to the response.
    //  Typically this is gotten via a SolrIndexSearcher.search(), but in this case we
    //  know exactly what documents to return, the order doesn't matter nor does
    //  scoring.//from www .  ja va 2s . c om
    //  Ideally an implementation of DocList could be directly implemented off
    //  of a BitSet, but there are way too many methods to implement for a minor
    //  payoff.
    int matchDocs = matchDocIdsBS.cardinality();
    int[] docIds = new int[Math.min(rows, matchDocs)];
    DocIdSetIterator docIdIter = new BitSetIterator(matchDocIdsBS, 1);
    for (int i = 0; i < docIds.length; i++) {
        docIds[i] = docIdIter.nextDoc();
    }
    return new DocSlice(0, docIds.length, docIds, null, matchDocs, 1f);
}

From source file:org.voyanttools.trombone.tool.corpus.DocumentNgrams.java

License:Open Source License

List<DocumentNgram> getNgrams(CorpusMapper corpusMapper, Keywords stopwords) throws IOException {
    Corpus corpus = corpusMapper.getCorpus();
    int[] totalTokens = corpus.getLastTokenPositions(tokenType);
    FlexibleQueue<DocumentNgram> queue = new FlexibleQueue<DocumentNgram>(comparator, start + limit);

    Set<String> validIds = new HashSet<String>();
    validIds.addAll(this.getCorpusStoredDocumentIdsFromParameters(corpus));
    OverlapFilter filter = getDocumentNgramsOverlapFilter(parameters);
    DocIdSetIterator it = corpusMapper.getDocIdSet().iterator();
    while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        int luceneDoc = it.docID();
        String docId = corpusMapper.getDocumentIdFromLuceneId(luceneDoc);
        if (validIds.contains(docId) == false) {
            continue;
        }//  w  w  w . j  a v  a2  s  . c  om
        int corpusDocumentIndex = corpusMapper.getDocumentPositionFromLuceneId(luceneDoc);
        int lastToken = totalTokens[corpusDocumentIndex];

        // build single grams as seed for ngrams
        SimplifiedTermInfo[] sparseSimplifiedTermInfoArray = getSparseSimplifiedTermInfoArray(corpusMapper,
                luceneDoc, lastToken);

        Map<String, List<int[]>> stringPositionsMap = new HashMap<String, List<int[]>>();
        for (int i = 0, len = sparseSimplifiedTermInfoArray.length; i < len; i++) {
            if (sparseSimplifiedTermInfoArray[i] != null
                    && sparseSimplifiedTermInfoArray[i].term.isEmpty() == false) {
                if (stringPositionsMap.containsKey(sparseSimplifiedTermInfoArray[i].term) == false) {
                    List<int[]> l = new ArrayList<int[]>();
                    l.add(new int[] { i, i });
                    stringPositionsMap.put(sparseSimplifiedTermInfoArray[i].term, l);
                } else {
                    stringPositionsMap.get(sparseSimplifiedTermInfoArray[i].term).add(new int[] { i, i });
                }
            }
        }

        List<DocumentNgram> ngrams = getNgramsFromStringPositions(stringPositionsMap, corpusDocumentIndex, 1);
        ngrams = getNextNgrams(ngrams, sparseSimplifiedTermInfoArray, corpusDocumentIndex, 2);

        ngrams = filter.getFilteredNgrams(ngrams, lastToken);

        for (DocumentNgram ngram : ngrams) {
            if (ngram.getLength() >= minLength && ngram.getLength() <= maxLength) {
                queue.offer(ngram);
            }
        }
    }

    return queue.getOrderedList(start);

}

From source file:org.zenoss.zep.index.impl.lucene.LuceneEventIndexBackend.java

License:Open Source License

protected void searchEventTagSeverities(EventFilter filter, EventTagSeverityCounter counter)
        throws ZepException {
    final boolean hasTagsFilter = filter.getTagFilterCount() > 0;
    IndexSearcher searcher = null;//from  w w w  .j a  v a2 s.  c om
    try {
        searcher = getSearcher();
        final Query query = buildQueryFromFilter(searcher.getIndexReader(), filter);
        final OpenBitSet docs = new OpenBitSet(searcher.getIndexReader().maxDoc());
        searcher.search(query, new Collector() {
            private int docBase;

            @Override
            public void setScorer(Scorer scorer) throws IOException {
            }

            @Override
            public void collect(int doc) throws IOException {
                docs.set(docBase + doc);
            }

            @Override
            public void setNextReader(AtomicReaderContext atomicReaderContext) throws IOException {
                this.docBase = atomicReaderContext.docBase;
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
        });
        int docId;
        final DocIdSetIterator it = docs.iterator();
        while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            final EventSummary summary;
            if (this.archive) {
                // TODO: This isn't very cheap - would be better to batch by UUID in separate calls
                // This doesn't get called on the event archive right now, so leave it until need to optimize.
                Document doc = searcher.doc(docId, UUID_FIELDS);
                summary = this.eventSummaryBaseDao.findByUuid(doc.get(FIELD_UUID));
            } else {
                Document doc = searcher.doc(docId);
                // this is an optimization for getting the non-archived tags from an organizer for ticket
                // see ZEN-7239. For this ticket we updated the index to store what we needed for generating the
                // tags severities. Since we do not want a migrate of completely deleting the index this
                // method is backwards compatible by uncompressing the protobuf
                if (doc.get(FIELD_SEVERITY) != null) {
                    int count = Integer.parseInt(doc.get(FIELD_COUNT));
                    boolean acknowledged = EventStatus.STATUS_ACKNOWLEDGED
                            .equals(EventStatus.valueOf(Integer.parseInt(doc.get(FIELD_STATUS))));
                    EventSeverity severity = EventSeverity.valueOf(Integer.parseInt(doc.get(FIELD_SEVERITY)));

                    // get the map for each filter and update the count
                    for (String tag : doc.getValues(FIELD_TAGS))
                        counter.update(tag, severity, count, acknowledged);
                    continue;
                } else {
                    summary = LuceneEventIndexMapper.toEventSummary(doc);
                }
            }
            boolean acknowledged = EventStatus.STATUS_ACKNOWLEDGED == summary.getStatus();
            Event occurrence = summary.getOccurrence(0);
            EventSeverity severity = occurrence.getSeverity();
            int count = occurrence.getCount();
            EventActor actor = occurrence.getActor();

            // Build tags from element_uuids - no tags specified in filter
            if (!hasTagsFilter) {
                if (actor.hasElementUuid())
                    counter.update(actor.getElementUuid(), severity, count, acknowledged);
            }
            // Build tag severities from passed in filter
            else {
                for (String uuid : Arrays.asList(actor.getElementUuid(), actor.getElementSubUuid()))
                    counter.update(uuid, severity, count, acknowledged);
                for (EventTag tag : occurrence.getTagsList())
                    for (String tagUuid : tag.getUuidList())
                        counter.update(tagUuid, severity, count, acknowledged);
            }
        }
    } catch (IOException e) {
        throw new ZepException(e);
    } catch (OutOfMemoryError e) {
        closeSearcherManager();
        throw e;
    } finally {
        returnSearcher(searcher);
    }
}

From source file:perf.DiskUsage.java

License:Apache License

static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception {
    Map<String, FieldStats> stats = new HashMap<>();
    Map<String, String> dvSuffixes = new HashMap<>();
    Map<String, String> postingsSuffixes = new HashMap<>();
    for (FieldInfo field : reader.getFieldInfos()) {
        FieldStats fieldStats = new FieldStats(field.name);
        stats.put(field.name, fieldStats);
        Map<String, String> attributes = field.attributes();
        if (attributes != null) {
            String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
            if (postingsSuffix != null) {
                postingsSuffixes.put(postingsSuffix, field.name);
            }/*  w w  w  . jav  a  2  s  .  c  o m*/
            String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY);
            if (dvSuffix != null) {
                dvSuffixes.put(dvSuffix, field.name);
            }
        }

        DocIdSetIterator docsWithField;
        switch (field.getDocValuesType()) {
        case NUMERIC:
            docsWithField = reader.getNumericDocValues(field.name);
            break;
        case BINARY:
            docsWithField = reader.getBinaryDocValues(field.name);
            break;
        case SORTED:
            docsWithField = reader.getSortedDocValues(field.name);
            break;
        case SORTED_NUMERIC:
            docsWithField = reader.getSortedNumericDocValues(field.name);
            break;
        case SORTED_SET:
            docsWithField = reader.getSortedSetDocValues(field.name);
            break;
        case NONE:
            docsWithField = null;
            break;
        default:
            docsWithField = null;
            break;
        }

        if (docsWithField != null) {
            int count = 0;
            while (docsWithField.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                count++;
            }
            fieldStats.docCountWithField = count;
        }
    }

    Directory directory = reader.directory();
    for (String file : directory.listAll()) {
        String suffix = parseSuffix(file);
        long bytes = directory.fileLength(file);
        if (suffix != null) {
            switch (IndexFileNames.getExtension(file)) {
            case "dvd":
            case "dvm":
                stats.get(dvSuffixes.get(suffix)).dvBytes += bytes;
                break;
            case "tim":
            case "tip":
                stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes;
                break;
            case "doc":
                stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes;
                break;
            case "pos":
            case "pay":
                stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes;
                break;
            default:
                throw new AssertionError("unexpected suffixed file: " + file);
            }
        } else {
            // not a per-field file, but we can hackishly do this for the points case.
            if ("dii".equals(IndexFileNames.getExtension(file))) {
                System.err.println(
                        "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!");
                try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) {
                    // fail hard if its not exactly the version we do this hack for.
                    CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0,
                            reader.getSegmentInfo().info.getId(), "");
                    int fieldCount = in.readVInt();
                    // strangely, bkd offsets are not in any guaranteed order
                    TreeMap<Long, String> offsetToField = new TreeMap<>();
                    for (int i = 0; i < fieldCount; i++) {
                        int field = in.readVInt();
                        long offset = in.readVLong();
                        offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name);
                    }
                    // now we can traverse in order
                    long previousOffset = 0;
                    for (Map.Entry<Long, String> entry : offsetToField.entrySet()) {
                        long offset = entry.getKey();
                        String field = entry.getValue();
                        stats.get(field).pointsBytes += (offset - previousOffset);
                        previousOffset = offset;
                    }
                    CodecUtil.checkFooter(in);
                }
            }
        }
    }

    return new TreeSet<FieldStats>(stats.values());
}