Example usage for org.apache.lucene.search.spans Spans NO_MORE_POSITIONS

List of usage examples for org.apache.lucene.search.spans Spans NO_MORE_POSITIONS

Introduction

In this page you can find the example usage for org.apache.lucene.search.spans Spans NO_MORE_POSITIONS.

Prototype

int NO_MORE_POSITIONS

To view the source code for org.apache.lucene.search.spans Spans NO_MORE_POSITIONS.

Click Source Link

Usage

From source file:it.cnr.ilc.lc.clavius.search.Tester.java

private static void searchWithContext(String term) {

    try {//from w  w w. j a va  2  s  .  c om
        logger.info("searchWithContext(" + term + ")");
        SpanQuery spanQuery = new SpanTermQuery(new Term("content", term));
        Directory indexDirectory = FSDirectory.open(
                Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText"));
        DirectoryReader indexReader = DirectoryReader.open(indexDirectory);
        IndexSearcher searcher = new IndexSearcher(indexReader);
        IndexReader reader = searcher.getIndexReader();
        //spanQuery = (SpanQuery) spanQuery.rewrite(reader);
        //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false);
        Spans spans = spanQuery.createWeight(searcher, false)
                .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
        //            Spans spans2 = weight.getSpans(reader.leaves().get(0),
        //                    SpanWeight.Postings.OFFSETS);
        //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS);
        ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs;

        logger.info("hits :" + sc.length);

        int i;
        if (null != spans) {
            //                while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) {
            for (int k = 0; k < sc.length; k++) {
                int docId = sc[k].doc;
                logger.info("docID: " + docId);
                int newDocID = spans.advance(docId);
                logger.info("newDocID: " + newDocID);

                int nextSpan = -1;
                while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) {
                    logger.info("nextSpan             : " + nextSpan);
                    logger.info("spans.startPosition(): " + spans.startPosition());
                    logger.info("spans.endPosition()  : " + spans.endPosition());
                    logger.info("spans.width()        : " + spans.width());

                    Fields fields = reader.getTermVectors(docId);
                    Terms terms = fields.terms("content");

                    TermsEnum termsEnum = terms.iterator();
                    BytesRef text;
                    PostingsEnum postingEnum = null;
                    int start = spans.startPosition() - 3;
                    int end = spans.endPosition() + 3;
                    while ((text = termsEnum.next()) != null) {
                        //could store the BytesRef here, but String is easier for this example
                        String s = new String(text.bytes, text.offset, text.length);
                        //                DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null);
                        postingEnum = termsEnum.postings(postingEnum);
                        if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                            i = 0;
                            int position = -1;
                            while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) {
                                if (position >= start && position <= end) {
                                    logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset
                                            + " length: " + text.length);
                                }
                                i++;
                            }

                        }

                    }
                }
            }
        } else {
            logger.info("no " + term + " found!");
        }
    } catch (IOException e) {
        logger.error(e.getMessage());
    }
    logger.info("End.");
}

From source file:nl.inl.blacklab.MockSpansInBuckets.java

License:Apache License

@Override
public int endPosition(int i) {
    if (currentBucket >= bucketDoc.length)
        return Spans.NO_MORE_POSITIONS;
    if (alreadyAtFirstBucket)
        return -1;
    return end[bucketStart[currentBucket] + i];
}

From source file:nl.inl.blacklab.MockSpansInBuckets.java

License:Apache License

@Override
public int startPosition(int i) {
    if (currentBucket >= bucketDoc.length)
        return Spans.NO_MORE_POSITIONS;
    if (alreadyAtFirstBucket)
        return -1;
    return start[bucketStart[currentBucket] + i];
}

From source file:nl.inl.blacklab.search.Hit.java

License:Apache License

/**
 * Retrieve a list of Hit objects from a Spans.
 *
 * @param spans/*ww  w.  j  a  v a 2s.  c  o  m*/
 *            where to retrieve the hits
 * @return the list of hits
 * @deprecated use Hits class
 */
@Deprecated
public static List<Hit> hitList(BLSpans spans) {
    List<Hit> result = new ArrayList<>();
    try {
        while (spans.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                result.add(spans.getHit());
            }
        }
        return result;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:nl.inl.blacklab.search.HitsImpl.java

License:Apache License

/**
 * Ensure that we have read at least as many hits as specified in the parameter.
 *
 * @param number the minimum number of hits that will have been read when this method
 *   returns (unless there are fewer hits than this); if negative, reads all hits
 * @throws InterruptedException if the thread was interrupted during this operation
 *//*w ww  .  j a  va  2  s . c om*/
void ensureHitsRead(int number) throws InterruptedException {
    if (sourceSpansFullyRead || (number >= 0 && hits.size() >= number))
        return;

    synchronized (this) {
        boolean readAllHits = number < 0;
        try {
            int maxHitsToCount = settings.maxHitsToCount();
            int maxHitsToRetrieve = settings.maxHitsToRetrieve();
            while (readAllHits || hits.size() < number) {

                // Don't hog the CPU, don't take too long
                etiquette.behave();

                // Stop if we're at the maximum number of hits we want to count
                if (maxHitsToCount >= 0 && hitsCounted >= maxHitsToCount) {
                    maxHitsCounted = true;
                    break;
                }

                // Get the next hit from the spans, moving to the next
                // segment when necessary.
                while (true) {
                    while (currentSourceSpans == null) {
                        // Exhausted (or not started yet); get next segment spans.

                        if (spanQuery == null) {
                            // We started from a Spans, not a SpanQuery. We're done now.
                            // (only used in deprecated methods or while testing)
                            return;
                        }

                        atomicReaderContextIndex++;
                        if (atomicReaderContexts != null
                                && atomicReaderContextIndex >= atomicReaderContexts.size()) {
                            sourceSpansFullyRead = true;
                            return;
                        }
                        if (atomicReaderContexts != null) {
                            // Get the atomic reader context and get the next Spans from it.
                            LeafReaderContext context = atomicReaderContexts.get(atomicReaderContextIndex);
                            currentDocBase = context.docBase;
                            Bits liveDocs = context.reader().getLiveDocs();
                            currentSourceSpans = BLSpansWrapper
                                    .optWrapSortUniq(spanQuery.getSpans(context, liveDocs, termContexts));
                        } else {
                            // TESTING
                            currentDocBase = 0;
                            if (atomicReaderContextIndex > 0) {
                                sourceSpansFullyRead = true;
                                return;
                            }
                            currentSourceSpans = BLSpansWrapper
                                    .optWrapSortUniq(spanQuery.getSpans(null, null, termContexts));
                        }

                        if (currentSourceSpans != null) {
                            // Update the hit query context with our new spans,
                            // and notify the spans of the hit query context
                            // (TODO: figure out if we need to call setHitQueryContext()
                            //    for each segment or not; if it's just about capture groups
                            //    registering themselves, we only need that for the first Spans.
                            //    But it's probably required for backreferences, etc. anyway,
                            //    and there won't be that many segments, so it's probably ok)
                            hitQueryContext.setSpans(currentSourceSpans);
                            currentSourceSpans.setHitQueryContext(hitQueryContext); // let captured groups register themselves
                            if (capturedGroups == null && hitQueryContext.numberOfCapturedGroups() > 0) {
                                capturedGroups = new HashMap<>();
                            }

                            int doc = currentSourceSpans.nextDoc();
                            if (doc == DocIdSetIterator.NO_MORE_DOCS)
                                currentSourceSpans = null; // no matching docs in this segment, try next
                        }
                    }

                    // Advance to next hit
                    int start = currentSourceSpans.nextStartPosition();
                    if (start == Spans.NO_MORE_POSITIONS) {
                        int doc = currentSourceSpans.nextDoc();
                        if (doc != DocIdSetIterator.NO_MORE_DOCS) {
                            // Go to first hit in doc
                            start = currentSourceSpans.nextStartPosition();
                        } else {
                            // This one is exhausted; go to the next one.
                            currentSourceSpans = null;
                        }
                    }
                    if (currentSourceSpans != null) {
                        // We're at the next hit.
                        break;
                    }
                }

                // Count the hit and add it (unless we've reached the maximum number of hits we
                // want)
                hitsCounted++;
                int hitDoc = currentSourceSpans.docID() + currentDocBase;
                if (hitDoc != previousHitDoc) {
                    docsCounted++;
                    if (!maxHitsRetrieved)
                        docsRetrieved++;
                    previousHitDoc = hitDoc;
                }
                maxHitsRetrieved = maxHitsToRetrieve >= 0 && hits.size() >= maxHitsToRetrieve;
                if (!maxHitsRetrieved) {
                    Hit hit = currentSourceSpans.getHit();
                    Hit offsetHit = new Hit(hit.doc + currentDocBase, hit.start, hit.end);
                    if (capturedGroups != null) {
                        Span[] groups = new Span[hitQueryContext.numberOfCapturedGroups()];
                        hitQueryContext.getCapturedGroups(groups);
                        capturedGroups.put(offsetHit, groups);
                    }
                    hits.add(offsetHit);
                }
            }
        } catch (InterruptedException e) {
            maxHitsRetrieved = maxHitsCounted = true; // we've stopped retrieving/counting
            throw e;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:nl.inl.blacklab.search.lucene.SpansInBucketsAbstract.java

License:Apache License

@Override
public int nextBucket() throws IOException {
    if (currentDoc < 0) {
        // Not nexted yet, no bucket
        return -1;
    }//from www.  ja v a2s  .co  m
    if (currentDoc == DocIdSetIterator.NO_MORE_DOCS || source.startPosition() == Spans.NO_MORE_POSITIONS)
        return NO_MORE_BUCKETS;
    return gatherHitsInternal();
}

From source file:nl.inl.blacklab.search.lucene.SpansInBucketsConsecutive.java

License:Apache License

@Override
protected void gatherHits() throws IOException {
    int lastEnd = source.startPosition();
    while (source.startPosition() == lastEnd) {
        addHitFromSource();//from  ww  w .  jav  a  2s .c o  m
        lastEnd = source.endPosition();
        if (source.nextStartPosition() == Spans.NO_MORE_POSITIONS)
            break;
    }
}

From source file:nl.inl.blacklab.search.lucene.SpansInBucketsPerDocument.java

License:Apache License

@Override
protected void gatherHits() throws IOException {
    do {/*  w  w  w . j a va2  s .  co  m*/
        addHitFromSource();
    } while (source.nextStartPosition() != Spans.NO_MORE_POSITIONS);
}

From source file:nl.inl.blacklab.search.lucene.SpansInBucketsPerStartPoint.java

License:Apache License

@Override
public int nextBucket() throws IOException {
    if (currentDoc < 0) {
        // Not nexted yet, no bucket
        return -1;
    }//from   w ww  .  j av  a  2s . co m
    if (currentSpansStart == Spans.NO_MORE_POSITIONS)
        return NO_MORE_BUCKETS;

    return gatherEndPointsAtStartPoint();
}

From source file:nl.inl.blacklab.search.lucene.SpansInBucketsPerStartPoint.java

License:Apache License

/**
 * Go to the next bucket at or beyond the specified start point.
 *
 * Always at least advances to the next bucket, even if we were
 * already at or beyond the specified target.
 *
 * @param targetPos the target start point
 * @return docID if we're at a valid bucket, or NO_MORE_BUCKETS if we're done.
 * @throws IOException/*from w  w  w .  j  a v a 2  s.  co  m*/
 */
public int advanceBucket(int targetPos) throws IOException {
    if (currentSpansStart >= targetPos)
        return nextBucket();
    currentSpansStart = source.advanceStartPosition(targetPos);
    if (currentSpansStart == Spans.NO_MORE_POSITIONS)
        return NO_MORE_BUCKETS;
    return gatherEndPointsAtStartPoint();
}