Example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Prototype

int NO_MORE_DOCS

To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Click Source Link

Document

When returned by #nextDoc() , #advance(int) and #docID() it means there are no more docs in the iterator.

Usage

From source file:org.sindice.siren.util.ScorerCellQueue.java

License:Apache License

public final boolean topSkipToAndAdjustElsePop(final int entity, final int tuple, final int cell)
        throws IOException {
    return this.checkAdjustElsePop(topHSC.scorer.advance(entity, tuple, cell) != DocIdSetIterator.NO_MORE_DOCS);
}

From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java

License:Apache License

static boolean visitLeafReader(LeafReaderContext leafCtx, Spans spans, DocIdSetIterator filterItr,
        DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException {
    int filterDoc = -1;
    int spansDoc = spans.nextDoc();
    while (true) {
        if (spansDoc == DocIdSetIterator.NO_MORE_DOCS) {
            break;
        }//from  w w w.j a  v a  2s  .c  o m
        filterDoc = filterItr.advance(spansDoc);
        if (filterDoc == DocIdSetIterator.NO_MORE_DOCS) {
            break;
        } else if (filterDoc > spansDoc) {
            while (spansDoc <= filterDoc) {
                spansDoc = spans.nextDoc();
                if (spansDoc == filterDoc) {
                    boolean cont = visit(leafCtx, spans, visitor);
                    if (!cont) {
                        return false;
                    }

                } else {
                    continue;
                }
            }
        } else if (filterDoc == spansDoc) {
            boolean cont = visit(leafCtx, spans, visitor);
            if (!cont) {
                return false;
            }
            //then iterate spans
            spansDoc = spans.nextDoc();
        } else if (filterDoc < spansDoc) {
            throw new IllegalArgumentException("FILTER doc is < spansdoc!!!");
        } else {
            throw new IllegalArgumentException("Something horrible happened");
        }
    }
    return true;
}

From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java

License:Apache License

static boolean visitLeafReader(LeafReaderContext leafCtx, Spans spans, DocTokenOffsetsVisitor visitor)
        throws IOException, TargetTokenNotFoundException {
    while (spans.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        boolean cont = visit(leafCtx, spans, visitor);
        if (!cont) {
            return false;
        }// ww w.j a va 2 s.co  m
    }
    return true;
}

From source file:org.tallison.lucene.search.concordance.TestSimpleAnalyzerUtil.java

License:Apache License

private void executeNeedleTests(Analyzer analyzer) throws Exception {

    String needle = getNeedle(analyzer);
    int numFieldValues = 23;

    Directory directory = buildNeedleIndex(needle, analyzer, numFieldValues);

    IndexReader reader = DirectoryReader.open(directory);

    LeafReaderContext ctx = reader.leaves().get(0);
    LeafReader r = ctx.reader();/*from  w  w  w  . j av a 2 s .  c  om*/

    PostingsEnum dpe = r.postings(new Term(FIELD, needle), PostingsEnum.ALL);
    int numTests = 0;
    try {
        while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            int frq = dpe.freq();
            int advanced = 0;

            String[] fieldValues = r.document(dpe.docID()).getValues(FIELD);
            while (++advanced < frq) {
                dpe.nextPosition();
                String rebuilt = SimpleAnalyzerUtil.substringFromMultiValuedFields(dpe.startOffset(),
                        dpe.endOffset(), fieldValues, analyzer.getOffsetGap(FIELD), " | ");
                assertEquals(needle, rebuilt);
                numTests++;
            }
        }
    } finally {
        reader.close();
        directory.close();
    }
    assertEquals("number of tests", numFieldValues - 1, numTests);
}

From source file:org.uberfire.ext.metadata.backend.lucene.index.BaseLuceneIndex.java

License:Apache License

protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException {
    final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves();
    final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()];
    final PostingsEnum[] docsEnums = new PostingsEnum[subReaders.size()];
    for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
        termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator();
    }//from  w  w w. ja  v  a 2s . c o m

    int[] results = new int[ids.length];

    for (int i = 0; i < results.length; i++) {
        results[i] = -1;
    }

    // for each id given
    for (int idx = 0; idx < ids.length; idx++) {
        int base = 0;
        final BytesRef id = new BytesRef(ids[idx]);
        // for each leaf reader..
        for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
            final LeafReader subReader = subReaders.get(subIDX).reader();
            final TermsEnum termsEnum = termsEnums[subIDX];
            // does the enumeration of ("id") terms from our reader contain the "id" field we're looking for?
            if (termsEnum.seekExact(id)) {
                final PostingsEnum docs = docsEnums[subIDX] = termsEnum.postings(docsEnums[subIDX], 0);
                // okay, the reader contains it, get the postings ("docs+") for and check that they're there (NP check)
                if (docs != null) {
                    final int docID = docs.nextDoc();
                    Bits liveDocs = subReader.getLiveDocs();
                    // But wait, maybe some of the docs have been deleted! Check that too..
                    if ((liveDocs == null || liveDocs.get(docID)) && docID != DocIdSetIterator.NO_MORE_DOCS) {
                        results[idx] = base + docID;
                        break;
                    }
                }
            }
            base += subReader.maxDoc();
        }
    }

    return results;
}

From source file:org.voyanttools.trombone.tool.corpus.DocumentNgrams.java

License:Open Source License

List<DocumentNgram> getNgrams(CorpusMapper corpusMapper, Keywords stopwords) throws IOException {
    Corpus corpus = corpusMapper.getCorpus();
    int[] totalTokens = corpus.getLastTokenPositions(tokenType);
    FlexibleQueue<DocumentNgram> queue = new FlexibleQueue<DocumentNgram>(comparator, start + limit);

    Set<String> validIds = new HashSet<String>();
    validIds.addAll(this.getCorpusStoredDocumentIdsFromParameters(corpus));
    OverlapFilter filter = getDocumentNgramsOverlapFilter(parameters);
    DocIdSetIterator it = corpusMapper.getDocIdSet().iterator();
    while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        int luceneDoc = it.docID();
        String docId = corpusMapper.getDocumentIdFromLuceneId(luceneDoc);
        if (validIds.contains(docId) == false) {
            continue;
        }/*from w  w w  .j a v  a2  s  .c o m*/
        int corpusDocumentIndex = corpusMapper.getDocumentPositionFromLuceneId(luceneDoc);
        int lastToken = totalTokens[corpusDocumentIndex];

        // build single grams as seed for ngrams
        SimplifiedTermInfo[] sparseSimplifiedTermInfoArray = getSparseSimplifiedTermInfoArray(corpusMapper,
                luceneDoc, lastToken);

        Map<String, List<int[]>> stringPositionsMap = new HashMap<String, List<int[]>>();
        for (int i = 0, len = sparseSimplifiedTermInfoArray.length; i < len; i++) {
            if (sparseSimplifiedTermInfoArray[i] != null
                    && sparseSimplifiedTermInfoArray[i].term.isEmpty() == false) {
                if (stringPositionsMap.containsKey(sparseSimplifiedTermInfoArray[i].term) == false) {
                    List<int[]> l = new ArrayList<int[]>();
                    l.add(new int[] { i, i });
                    stringPositionsMap.put(sparseSimplifiedTermInfoArray[i].term, l);
                } else {
                    stringPositionsMap.get(sparseSimplifiedTermInfoArray[i].term).add(new int[] { i, i });
                }
            }
        }

        List<DocumentNgram> ngrams = getNgramsFromStringPositions(stringPositionsMap, corpusDocumentIndex, 1);
        ngrams = getNextNgrams(ngrams, sparseSimplifiedTermInfoArray, corpusDocumentIndex, 2);

        ngrams = filter.getFilteredNgrams(ngrams, lastToken);

        for (DocumentNgram ngram : ngrams) {
            if (ngram.getLength() >= minLength && ngram.getLength() <= maxLength) {
                queue.offer(ngram);
            }
        }
    }

    return queue.getOrderedList(start);

}

From source file:org.zenoss.zep.index.impl.lucene.LuceneEventIndexBackend.java

License:Open Source License

protected void searchEventTagSeverities(EventFilter filter, EventTagSeverityCounter counter)
        throws ZepException {
    final boolean hasTagsFilter = filter.getTagFilterCount() > 0;
    IndexSearcher searcher = null;/*  ww  w.ja v  a 2 s .  com*/
    try {
        searcher = getSearcher();
        final Query query = buildQueryFromFilter(searcher.getIndexReader(), filter);
        final OpenBitSet docs = new OpenBitSet(searcher.getIndexReader().maxDoc());
        searcher.search(query, new Collector() {
            private int docBase;

            @Override
            public void setScorer(Scorer scorer) throws IOException {
            }

            @Override
            public void collect(int doc) throws IOException {
                docs.set(docBase + doc);
            }

            @Override
            public void setNextReader(AtomicReaderContext atomicReaderContext) throws IOException {
                this.docBase = atomicReaderContext.docBase;
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
        });
        int docId;
        final DocIdSetIterator it = docs.iterator();
        while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            final EventSummary summary;
            if (this.archive) {
                // TODO: This isn't very cheap - would be better to batch by UUID in separate calls
                // This doesn't get called on the event archive right now, so leave it until need to optimize.
                Document doc = searcher.doc(docId, UUID_FIELDS);
                summary = this.eventSummaryBaseDao.findByUuid(doc.get(FIELD_UUID));
            } else {
                Document doc = searcher.doc(docId);
                // this is an optimization for getting the non-archived tags from an organizer for ticket
                // see ZEN-7239. For this ticket we updated the index to store what we needed for generating the
                // tags severities. Since we do not want a migrate of completely deleting the index this
                // method is backwards compatible by uncompressing the protobuf
                if (doc.get(FIELD_SEVERITY) != null) {
                    int count = Integer.parseInt(doc.get(FIELD_COUNT));
                    boolean acknowledged = EventStatus.STATUS_ACKNOWLEDGED
                            .equals(EventStatus.valueOf(Integer.parseInt(doc.get(FIELD_STATUS))));
                    EventSeverity severity = EventSeverity.valueOf(Integer.parseInt(doc.get(FIELD_SEVERITY)));

                    // get the map for each filter and update the count
                    for (String tag : doc.getValues(FIELD_TAGS))
                        counter.update(tag, severity, count, acknowledged);
                    continue;
                } else {
                    summary = LuceneEventIndexMapper.toEventSummary(doc);
                }
            }
            boolean acknowledged = EventStatus.STATUS_ACKNOWLEDGED == summary.getStatus();
            Event occurrence = summary.getOccurrence(0);
            EventSeverity severity = occurrence.getSeverity();
            int count = occurrence.getCount();
            EventActor actor = occurrence.getActor();

            // Build tags from element_uuids - no tags specified in filter
            if (!hasTagsFilter) {
                if (actor.hasElementUuid())
                    counter.update(actor.getElementUuid(), severity, count, acknowledged);
            }
            // Build tag severities from passed in filter
            else {
                for (String uuid : Arrays.asList(actor.getElementUuid(), actor.getElementSubUuid()))
                    counter.update(uuid, severity, count, acknowledged);
                for (EventTag tag : occurrence.getTagsList())
                    for (String tagUuid : tag.getUuidList())
                        counter.update(tagUuid, severity, count, acknowledged);
            }
        }
    } catch (IOException e) {
        throw new ZepException(e);
    } catch (OutOfMemoryError e) {
        closeSearcherManager();
        throw e;
    } finally {
        returnSearcher(searcher);
    }
}

From source file:perf.DiskUsage.java

License:Apache License

static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception {
    Map<String, FieldStats> stats = new HashMap<>();
    Map<String, String> dvSuffixes = new HashMap<>();
    Map<String, String> postingsSuffixes = new HashMap<>();
    for (FieldInfo field : reader.getFieldInfos()) {
        FieldStats fieldStats = new FieldStats(field.name);
        stats.put(field.name, fieldStats);
        Map<String, String> attributes = field.attributes();
        if (attributes != null) {
            String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
            if (postingsSuffix != null) {
                postingsSuffixes.put(postingsSuffix, field.name);
            }/*from  w ww .  ja  v  a2 s .  co m*/
            String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY);
            if (dvSuffix != null) {
                dvSuffixes.put(dvSuffix, field.name);
            }
        }

        DocIdSetIterator docsWithField;
        switch (field.getDocValuesType()) {
        case NUMERIC:
            docsWithField = reader.getNumericDocValues(field.name);
            break;
        case BINARY:
            docsWithField = reader.getBinaryDocValues(field.name);
            break;
        case SORTED:
            docsWithField = reader.getSortedDocValues(field.name);
            break;
        case SORTED_NUMERIC:
            docsWithField = reader.getSortedNumericDocValues(field.name);
            break;
        case SORTED_SET:
            docsWithField = reader.getSortedSetDocValues(field.name);
            break;
        case NONE:
            docsWithField = null;
            break;
        default:
            docsWithField = null;
            break;
        }

        if (docsWithField != null) {
            int count = 0;
            while (docsWithField.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                count++;
            }
            fieldStats.docCountWithField = count;
        }
    }

    Directory directory = reader.directory();
    for (String file : directory.listAll()) {
        String suffix = parseSuffix(file);
        long bytes = directory.fileLength(file);
        if (suffix != null) {
            switch (IndexFileNames.getExtension(file)) {
            case "dvd":
            case "dvm":
                stats.get(dvSuffixes.get(suffix)).dvBytes += bytes;
                break;
            case "tim":
            case "tip":
                stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes;
                break;
            case "doc":
                stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes;
                break;
            case "pos":
            case "pay":
                stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes;
                break;
            default:
                throw new AssertionError("unexpected suffixed file: " + file);
            }
        } else {
            // not a per-field file, but we can hackishly do this for the points case.
            if ("dii".equals(IndexFileNames.getExtension(file))) {
                System.err.println(
                        "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!");
                try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) {
                    // fail hard if its not exactly the version we do this hack for.
                    CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0,
                            reader.getSegmentInfo().info.getId(), "");
                    int fieldCount = in.readVInt();
                    // strangely, bkd offsets are not in any guaranteed order
                    TreeMap<Long, String> offsetToField = new TreeMap<>();
                    for (int i = 0; i < fieldCount; i++) {
                        int field = in.readVInt();
                        long offset = in.readVLong();
                        offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name);
                    }
                    // now we can traverse in order
                    long previousOffset = 0;
                    for (Map.Entry<Long, String> entry : offsetToField.entrySet()) {
                        long offset = entry.getKey();
                        String field = entry.getValue();
                        stats.get(field).pointsBytes += (offset - previousOffset);
                        previousOffset = offset;
                    }
                    CodecUtil.checkFooter(in);
                }
            }
        }
    }

    return new TreeSet<FieldStats>(stats.values());
}

From source file:perf.PKLookupTask.java

License:Apache License

@Override
public void go(IndexState state) throws IOException {

    final IndexSearcher searcher = state.mgr.acquire();
    try {// w ww  .  ja v  a  2  s .c o  m
        final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves();
        IndexState.PKLookupState[] pkStates = new IndexState.PKLookupState[subReaders.size()];
        for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
            LeafReaderContext ctx = subReaders.get(subIDX);
            ThreadLocal<IndexState.PKLookupState> states = state.pkLookupStates
                    .get(ctx.reader().getCoreCacheKey());
            // NPE here means you are trying to use this task on a newly refreshed NRT reader!
            IndexState.PKLookupState pkState = states.get();
            if (pkState == null) {
                pkState = new IndexState.PKLookupState(ctx.reader(), "id");
                states.set(pkState);
            }
            pkStates[subIDX] = pkState;
        }

        for (int idx = 0; idx < ids.length; idx++) {
            int base = 0;
            final BytesRef id = ids[idx];
            for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
                IndexState.PKLookupState pkState = pkStates[subIDX];
                //System.out.println("\nTASK: sub=" + sub);
                //System.out.println("TEST: lookup " + ids[idx].utf8ToString());
                if (pkState.termsEnum.seekExact(id)) {
                    //System.out.println("  found!");
                    PostingsEnum docs = pkState.termsEnum.postings(pkState.postingsEnum, 0);
                    assert docs != null;
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docs.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docs.nextDoc()) {
                        if (pkState.liveDocs == null || pkState.liveDocs.get(d)) {
                            docID = d;
                            break;
                        }
                    }
                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        answers[idx] = base + docID;
                        break;
                    }
                }
                base += subReaders.get(subIDX).reader().maxDoc();
            }
        }
    } finally {
        state.mgr.release(searcher);
    }
}

From source file:proj.zoie.api.impl.util.ArrayDocIdSet.java

License:Apache License

@Override
public DocIdSetIterator iterator() {
    return new DocIdSetIterator() {
        int doc = -1;
        int current = -1;
        int largest = _lengthminusone;

        @Override/*from w w w . j  av  a 2 s .  c om*/
        public int docID() {
            return doc;
        }

        @Override
        public int nextDoc() throws IOException {
            if (current < _lengthminusone) {
                current++;
                doc = _docids[current];
                return doc;
            }
            return DocIdSetIterator.NO_MORE_DOCS;
        }

        @Override
        public int advance(int target) throws IOException {
            int idx = current < 0 ? binarySearch(_docids, target)
                    : binarySearch(_docids, target, current, largest);
            //  int idx = Arrays.binarySearch(_docids,target);
            if (idx < 0) {
                idx = -(idx + 1);
                if (idx >= _docids.length)
                    return DocIdSetIterator.NO_MORE_DOCS;
            }
            current = idx;
            doc = _docids[current];
            return doc;
        }
    };
}