Example usage for org.apache.lucene.search DocIdSetIterator NO_MORE

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Prototype

int NO_MORE_DOCS

To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Click Source Link

Document

When returned by #nextDoc() , #advance(int) and #docID() it means there are no more docs in the iterator.

Usage

From source file:nl.inl.blacklab.search.lucene.SpansInBucketsAbstract.java

License:Apache License

@Override
public int nextBucket() throws IOException {
    if (currentDoc < 0) {
        // Not nexted yet, no bucket
        return -1;
    }/*from   w  w  w . j  a  v a  2 s  . c  o  m*/
    if (currentDoc == DocIdSetIterator.NO_MORE_DOCS || source.startPosition() == Spans.NO_MORE_POSITIONS)
        return NO_MORE_BUCKETS;
    return gatherHitsInternal();
}

From source file:nl.inl.blacklab.search.lucene.SpansInBucketsAbstract.java

License:Apache License

@Override
public int advance(int target) throws IOException {
    bucketSize = -1; // not at a valid bucket anymore
    if (currentDoc != DocIdSetIterator.NO_MORE_DOCS) {
        if (currentDoc >= target)
            nextDoc();/*from   w w w. jav a2  s  .  c om*/
        else {
            currentDoc = source.advance(target);
            if (currentDoc != DocIdSetIterator.NO_MORE_DOCS) {
                source.nextStartPosition(); // start gathering at the first hit
                //gatherHitsInternal();
            }
        }
    }
    return currentDoc;
}

From source file:nl.inl.blacklab.search.lucene.SpansInBucketsPerStartPoint.java

License:Apache License

@Override
public int advance(int target) throws IOException {
    if (currentDoc >= target) {
        return nextDoc();
    }/*from w w w  .j a v  a2 s. c  om*/

    if (currentDoc == NO_MORE_DOCS)
        return DocIdSetIterator.NO_MORE_DOCS;

    if (currentDoc < target) {
        currentDoc = source.advance(target);
        currentSpansStart = source.nextStartPosition();
        currentBucketStart = -1; // no bucket yet
    }

    return currentDoc;
}

From source file:nl.inl.blacklab.search.lucene.TestSpansInBuckets.java

License:Apache License

@Test
public void testSkipToPastEnd() throws IOException {
    Assert.assertEquals(DocIdSetIterator.NO_MORE_DOCS, hpd.advance(6));
}

From source file:nl.inl.blacklab.search.Searcher.java

License:Apache License

/**
 * Get character positions from word positions.
 *
 * Places character positions in the same arrays as the word positions were specified in.
 *
 * @param doc/*from w ww. j a va2 s.com*/
 *            the document from which to find character positions
 * @param fieldName
 *            the field from which to find character positions
 * @param startsOfWords
 *            word positions for which we want starting character positions (i.e. the position
 *            of the first letter of that word)
 * @param endsOfWords
 *            word positions for which we want ending character positions (i.e. the position of
 *            the last letter of that word)
 * @param fillInDefaultsIfNotFound
 *            if true, if any illegal word positions are specified (say, past the end of the
 *            document), a sane default value is chosen (in this case, the last character of the
 *            last word found). Otherwise, throws an exception.
 */
void getCharacterOffsets(int doc, String fieldName, int[] startsOfWords, int[] endsOfWords,
        boolean fillInDefaultsIfNotFound) {

    if (startsOfWords.length == 0)
        return; // nothing to do
    try {
        // Determine lowest and highest word position we'd like to know something about.
        // This saves a little bit of time for large result sets.
        int minP = -1, maxP = -1;
        int numStarts = startsOfWords.length;
        int numEnds = endsOfWords.length;
        for (int i = 0; i < numStarts; i++) {
            if (startsOfWords[i] < minP || minP == -1)
                minP = startsOfWords[i];
            if (startsOfWords[i] > maxP)
                maxP = startsOfWords[i];
        }
        for (int i = 0; i < numEnds; i++) {
            if (endsOfWords[i] < minP || minP == -1)
                minP = endsOfWords[i];
            if (endsOfWords[i] > maxP)
                maxP = endsOfWords[i];
        }
        if (minP < 0 || maxP < 0)
            throw new RuntimeException("Can't determine min and max positions");

        String fieldPropName = ComplexFieldUtil.mainPropertyOffsetsField(indexStructure, fieldName);

        org.apache.lucene.index.Terms terms = reader.getTermVector(doc, fieldPropName);
        if (terms == null)
            throw new RuntimeException("Field " + fieldPropName + " in doc " + doc + " has no term vector");
        if (!terms.hasPositions())
            throw new RuntimeException(
                    "Field " + fieldPropName + " in doc " + doc + " has no character postion information");

        //int lowestPos = -1, highestPos = -1;
        int lowestPosFirstChar = -1, highestPosLastChar = -1;
        int total = numStarts + numEnds;
        boolean[] done = new boolean[total]; // NOTE: array is automatically initialized to zeroes!
        int found = 0;

        // Iterate over terms
        TermsEnum termsEnum = terms.iterator(null);
        while (termsEnum.next() != null) {
            DocsAndPositionsEnum dpe = termsEnum.docsAndPositions(null, null);

            // Iterate over docs containing this term (NOTE: should be only one doc!)
            while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                int position = -1;

                // Iterate over positions of this term in this doc
                int positionsRead = 0;
                int numberOfPositions = dpe.freq();
                while (positionsRead < numberOfPositions) {
                    position = dpe.nextPosition();
                    if (position == -1)
                        break;
                    positionsRead++;

                    // Keep track of the lowest and highest char pos, so
                    // we can fill in the character positions we didn't find
                    int startOffset = dpe.startOffset();
                    if (startOffset < lowestPosFirstChar || lowestPosFirstChar == -1) {
                        lowestPosFirstChar = startOffset;
                    }
                    int endOffset = dpe.endOffset();
                    if (endOffset > highestPosLastChar) {
                        highestPosLastChar = endOffset;
                    }

                    // We've calculated the min and max word positions in advance, so
                    // we know we can skip this position if it's outside the range we're interested in.
                    // (Saves a little time for large result sets)
                    if (position < minP || position > maxP) {
                        continue;
                    }

                    for (int m = 0; m < numStarts; m++) {
                        if (!done[m] && position == startsOfWords[m]) {
                            done[m] = true;
                            startsOfWords[m] = startOffset;
                            found++;
                        }
                    }
                    for (int m = 0; m < numEnds; m++) {
                        if (!done[numStarts + m] && position == endsOfWords[m]) {
                            done[numStarts + m] = true;
                            endsOfWords[m] = endOffset;
                            found++;
                        }
                    }

                    // NOTE: we might be tempted to break here if found == total,
                    // but that would foul up our calculation of highestPosLastChar and
                    // lowestPosFirstChar.
                }
            }

        }
        if (found < total) {
            if (!fillInDefaultsIfNotFound)
                throw new RuntimeException("Could not find all character offsets!");

            if (lowestPosFirstChar < 0 || highestPosLastChar < 0)
                throw new RuntimeException("Could not find default char positions!");

            for (int m = 0; m < numStarts; m++) {
                if (!done[m])
                    startsOfWords[m] = lowestPosFirstChar;
            }
            for (int m = 0; m < numEnds; m++) {
                if (!done[numStarts + m])
                    endsOfWords[m] = highestPosLastChar;
            }
        }

    } catch (IOException e) {
        throw ExUtil.wrapRuntimeException(e);
    }
}

From source file:nl.inl.blacklab.search.Searcher.java

License:Apache License

/** Deletes documents matching a query from the BlackLab index.
 *
 * This deletes the documents from the Lucene index, the forward indices and the content store(s).
 * @param q the query//from  w w  w. j  ava2 s . c  o m
 */
public void delete(Query q) {
    if (!indexMode)
        throw new RuntimeException("Cannot delete documents, not in index mode");
    try {
        // Open a fresh reader to execute the query
        DirectoryReader reader = DirectoryReader.open(indexWriter, false);
        try {
            // Execute the query, iterate over the docs and delete from FI and CS.
            IndexSearcher s = new IndexSearcher(reader);
            Weight w = s.createNormalizedWeight(q);
            AtomicReader scrw = new SlowCompositeReaderWrapper(reader);
            try {
                Scorer sc = w.scorer(scrw.getContext(), true, false, MultiFields.getLiveDocs(reader));
                if (sc == null)
                    return; // no matching documents

                // Iterate over matching docs
                while (true) {
                    int docId;
                    try {
                        docId = sc.nextDoc();
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                    if (docId == DocIdSetIterator.NO_MORE_DOCS)
                        break;
                    Document d = reader.document(docId);

                    // Delete this document in all forward indices
                    for (Map.Entry<String, ForwardIndex> e : forwardIndices.entrySet()) {
                        String fieldName = e.getKey();
                        ForwardIndex fi = e.getValue();
                        int fiid = Integer.parseInt(d.get(ComplexFieldUtil.forwardIndexIdField(fieldName)));
                        fi.deleteDocument(fiid);
                    }

                    // Delete this document in all content stores
                    for (Map.Entry<String, ContentAccessor> e : contentAccessors.entrySet()) {
                        String fieldName = e.getKey();
                        ContentAccessor ca = e.getValue();
                        if (!(ca instanceof ContentAccessorContentStore))
                            continue; // can only delete from content store
                        ContentStore cs = ((ContentAccessorContentStore) ca).getContentStore();
                        int cid = Integer.parseInt(d.get(ComplexFieldUtil.contentIdField((fieldName))));
                        cs.delete(cid);
                    }
                }
            } finally {
                scrw.close();
            }
        } finally {
            reader.close();
        }

        // Finally, delete the documents from the Lucene index
        indexWriter.deleteDocuments(q);

    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:nl.inl.blacklab.search.Searcher.java

License:Apache License

/**
 * Determine the term frequencies in a set of documents (defined by the filter query)
 *
 * @param documentFilterQuery what set of documents to get the term frequencies for
 * @param fieldName complex field name, i.e. contents
 * @param propName property name, i.e. word, lemma, pos, etc.
 * @param altName alternative name, i.e. s, i (case-sensitivity)
 * @return the term frequency map//from   ww w .  j  a  v  a2  s  . co m
 */
public Map<String, Integer> termFrequencies(Query documentFilterQuery, String fieldName, String propName,
        String altName) {
    try {
        String luceneField = ComplexFieldUtil.propertyField(fieldName, propName, altName);
        Weight weight = indexSearcher.createNormalizedWeight(documentFilterQuery);
        Map<String, Integer> freq = new HashMap<String, Integer>();
        for (AtomicReaderContext arc : reader.leaves()) {
            if (weight == null)
                throw new RuntimeException("weight == null");
            if (arc == null)
                throw new RuntimeException("arc == null");
            if (arc.reader() == null)
                throw new RuntimeException("arc.reader() == null");
            Scorer scorer = weight.scorer(arc, true, false, arc.reader().getLiveDocs());
            if (scorer != null) {
                while (scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                    LuceneUtil.getFrequenciesFromTermVector(reader, scorer.docID() + arc.docBase, luceneField,
                            freq);
                }
            }
        }
        return freq;
    } catch (IOException e) {
        throw ExUtil.wrapRuntimeException(e);
    }
}

From source file:nl.inl.blacklab.search.SearcherImpl.java

License:Apache License

@Override
public void getCharacterOffsets(int doc, String fieldName, int[] startsOfWords, int[] endsOfWords,
        boolean fillInDefaultsIfNotFound) {

    if (startsOfWords.length == 0)
        return; // nothing to do
    try {// w  w  w. j  a va  2s. c  om
        // Determine lowest and highest word position we'd like to know something about.
        // This saves a little bit of time for large result sets.
        int minP = -1, maxP = -1;
        int numStarts = startsOfWords.length;
        int numEnds = endsOfWords.length;
        for (int i = 0; i < numStarts; i++) {
            if (startsOfWords[i] < minP || minP == -1)
                minP = startsOfWords[i];
            if (startsOfWords[i] > maxP)
                maxP = startsOfWords[i];
        }
        for (int i = 0; i < numEnds; i++) {
            if (endsOfWords[i] < minP || minP == -1)
                minP = endsOfWords[i];
            if (endsOfWords[i] > maxP)
                maxP = endsOfWords[i];
        }
        if (minP < 0 || maxP < 0)
            throw new RuntimeException("Can't determine min and max positions");

        String fieldPropName = ComplexFieldUtil.mainPropertyOffsetsField(indexStructure, fieldName);

        org.apache.lucene.index.Terms terms = reader.getTermVector(doc, fieldPropName);
        if (terms == null)
            throw new IllegalArgumentException(
                    "Field " + fieldPropName + " in doc " + doc + " has no term vector");
        if (!terms.hasPositions())
            throw new IllegalArgumentException(
                    "Field " + fieldPropName + " in doc " + doc + " has no character postion information");

        //int lowestPos = -1, highestPos = -1;
        int lowestPosFirstChar = -1, highestPosLastChar = -1;
        int total = numStarts + numEnds;
        boolean[] done = new boolean[total]; // NOTE: array is automatically initialized to zeroes!
        int found = 0;

        // Iterate over terms
        TermsEnum termsEnum = terms.iterator();
        while (termsEnum.next() != null) {
            PostingsEnum dpe = termsEnum.postings(null, null, PostingsEnum.POSITIONS);

            // Iterate over docs containing this term (NOTE: should be only one doc!)
            while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                // Iterate over positions of this term in this doc
                int positionsRead = 0;
                int numberOfPositions = dpe.freq();
                while (positionsRead < numberOfPositions) {
                    int position = dpe.nextPosition();
                    if (position == -1)
                        break;
                    positionsRead++;

                    // Keep track of the lowest and highest char pos, so
                    // we can fill in the character positions we didn't find
                    int startOffset = dpe.startOffset();
                    if (startOffset < lowestPosFirstChar || lowestPosFirstChar == -1) {
                        lowestPosFirstChar = startOffset;
                    }
                    int endOffset = dpe.endOffset();
                    if (endOffset > highestPosLastChar) {
                        highestPosLastChar = endOffset;
                    }

                    // We've calculated the min and max word positions in advance, so
                    // we know we can skip this position if it's outside the range we're interested in.
                    // (Saves a little time for large result sets)
                    if (position < minP || position > maxP) {
                        continue;
                    }

                    for (int m = 0; m < numStarts; m++) {
                        if (!done[m] && position == startsOfWords[m]) {
                            done[m] = true;
                            startsOfWords[m] = startOffset;
                            found++;
                        }
                    }
                    for (int m = 0; m < numEnds; m++) {
                        if (!done[numStarts + m] && position == endsOfWords[m]) {
                            done[numStarts + m] = true;
                            endsOfWords[m] = endOffset;
                            found++;
                        }
                    }

                    // NOTE: we might be tempted to break here if found == total,
                    // but that would foul up our calculation of highestPosLastChar and
                    // lowestPosFirstChar.
                }
            }

        }
        if (found < total) {
            if (!fillInDefaultsIfNotFound)
                throw new RuntimeException("Could not find all character offsets!");

            if (lowestPosFirstChar < 0 || highestPosLastChar < 0)
                throw new RuntimeException("Could not find default char positions!");

            for (int m = 0; m < numStarts; m++) {
                if (!done[m])
                    startsOfWords[m] = lowestPosFirstChar;
            }
            for (int m = 0; m < numEnds; m++) {
                if (!done[numStarts + m])
                    endsOfWords[m] = highestPosLastChar;
            }
        }

    } catch (IOException e) {
        throw ExUtil.wrapRuntimeException(e);
    }
}

From source file:nl.inl.blacklab.search.SearcherImpl.java

License:Apache License

@Override
public void delete(Query q) {
    if (!indexMode)
        throw new RuntimeException("Cannot delete documents, not in index mode");
    try {/*from w ww .  j  a v a 2  s .  c  om*/
        // Open a fresh reader to execute the query
        try (IndexReader freshReader = DirectoryReader.open(indexWriter, false)) {
            // Execute the query, iterate over the docs and delete from FI and CS.
            IndexSearcher s = new IndexSearcher(freshReader);
            Weight w = s.createNormalizedWeight(q, false);
            try (LeafReader scrw = SlowCompositeReaderWrapper.wrap(freshReader)) {
                Scorer sc = w.scorer(scrw.getContext(), MultiFields.getLiveDocs(freshReader));
                if (sc == null)
                    return; // no matching documents

                // Iterate over matching docs
                while (true) {
                    int docId;
                    try {
                        docId = sc.nextDoc();
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                    if (docId == DocIdSetIterator.NO_MORE_DOCS)
                        break;
                    Document d = freshReader.document(docId);

                    deleteFromForwardIndices(d);

                    // Delete this document in all content stores
                    contentStores.deleteDocument(d);
                }
            }
        } finally {
            reader.close();
        }

        // Finally, delete the documents from the Lucene index
        indexWriter.deleteDocuments(q);

    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:nl.inl.blacklab.TestUtil.java

License:Apache License

public static void assertEquals(Spans expected, Spans actual, boolean skipFirstNextDoc) throws IOException {
    int docNumber = 0, hitNumber;
    boolean firstDoc = true;
    while (true) {
        int actualDocId;
        if (firstDoc && skipFirstNextDoc) {
            // Actual Spans already skipped to document for testing. Don't .nextDoc() this time.
            firstDoc = false;//from  w  ww .j  a  v a  2s  .c o  m
            actualDocId = actual.docID();
        } else {
            actualDocId = actual.nextDoc();
        }
        docNumber++;
        hitNumber = 0;
        Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", expected.nextDoc(), actualDocId);
        Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", expected.docID(), actual.docID());
        Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", actualDocId, actual.docID());
        if (actualDocId == DocIdSetIterator.NO_MORE_DOCS)
            break;
        Assert.assertEquals(-1, actual.startPosition());
        Assert.assertEquals(-1, actual.endPosition());
        boolean first = true;
        while (true) {
            int actualStartPos = actual.nextStartPosition();
            if (first) {
                // .nextDoc() should always place us in a document with at least 1 hit
                first = false;
                Assert.assertFalse(actualStartPos == Spans.NO_MORE_POSITIONS);
            }
            hitNumber++;
            Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", expected.nextStartPosition(),
                    actualStartPos);
            Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", expected.startPosition(),
                    actual.startPosition());
            Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", actualStartPos,
                    actual.startPosition());
            Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": end pos", expected.endPosition(),
                    actual.endPosition());
            if (actualStartPos == Spans.NO_MORE_POSITIONS) {
                Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", actualDocId, actual.docID());
                Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", Spans.NO_MORE_POSITIONS,
                        actual.nextStartPosition());
                Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", Spans.NO_MORE_POSITIONS,
                        actual.startPosition());
                Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": end pos", Spans.NO_MORE_POSITIONS,
                        actual.endPosition());
                break;
            }
        }
    }
}

Example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

Introduction

Prototype

Document

Usage