List of usage examples for org.apache.lucene.search.spans Spans NO_MORE_POSITIONS
int NO_MORE_POSITIONS
To view the source code for org.apache.lucene.search.spans Spans NO_MORE_POSITIONS.
Click Source Link
From source file:it.cnr.ilc.lc.clavius.search.Tester.java
private static void searchWithContext(String term) { try {//from w w w. j a va 2 s . c om logger.info("searchWithContext(" + term + ")"); SpanQuery spanQuery = new SpanTermQuery(new Term("content", term)); Directory indexDirectory = FSDirectory.open( Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText")); DirectoryReader indexReader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(indexReader); IndexReader reader = searcher.getIndexReader(); //spanQuery = (SpanQuery) spanQuery.rewrite(reader); //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false); Spans spans = spanQuery.createWeight(searcher, false) .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); // Spans spans2 = weight.getSpans(reader.leaves().get(0), // SpanWeight.Postings.OFFSETS); //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS); ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs; logger.info("hits :" + sc.length); int i; if (null != spans) { // while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) { for (int k = 0; k < sc.length; k++) { int docId = sc[k].doc; logger.info("docID: " + docId); int newDocID = spans.advance(docId); logger.info("newDocID: " + newDocID); int nextSpan = -1; while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) { logger.info("nextSpan : " + nextSpan); logger.info("spans.startPosition(): " + spans.startPosition()); logger.info("spans.endPosition() : " + spans.endPosition()); logger.info("spans.width() : " + spans.width()); Fields fields = reader.getTermVectors(docId); Terms terms = fields.terms("content"); TermsEnum termsEnum = terms.iterator(); BytesRef text; PostingsEnum postingEnum = null; int start = spans.startPosition() - 3; int end = spans.endPosition() + 3; while ((text = termsEnum.next()) != null) { //could store the BytesRef here, but String is easier for this example String s = new String(text.bytes, text.offset, text.length); // DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null); postingEnum = termsEnum.postings(postingEnum); if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { i = 0; int position = -1; while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) { if (position >= start && position <= end) { logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset + " length: " + text.length); } i++; } } } } } } else { logger.info("no " + term + " found!"); } } catch (IOException e) { logger.error(e.getMessage()); } logger.info("End."); }
From source file:nl.inl.blacklab.MockSpansInBuckets.java
License:Apache License
@Override public int endPosition(int i) { if (currentBucket >= bucketDoc.length) return Spans.NO_MORE_POSITIONS; if (alreadyAtFirstBucket) return -1; return end[bucketStart[currentBucket] + i]; }
From source file:nl.inl.blacklab.MockSpansInBuckets.java
License:Apache License
@Override public int startPosition(int i) { if (currentBucket >= bucketDoc.length) return Spans.NO_MORE_POSITIONS; if (alreadyAtFirstBucket) return -1; return start[bucketStart[currentBucket] + i]; }
From source file:nl.inl.blacklab.search.Hit.java
License:Apache License
/** * Retrieve a list of Hit objects from a Spans. * * @param spans/*ww w. j a v a 2s. c o m*/ * where to retrieve the hits * @return the list of hits * @deprecated use Hits class */ @Deprecated public static List<Hit> hitList(BLSpans spans) { List<Hit> result = new ArrayList<>(); try { while (spans.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { result.add(spans.getHit()); } } return result; } catch (IOException e) { throw new RuntimeException(e); } }
From source file:nl.inl.blacklab.search.HitsImpl.java
License:Apache License
/** * Ensure that we have read at least as many hits as specified in the parameter. * * @param number the minimum number of hits that will have been read when this method * returns (unless there are fewer hits than this); if negative, reads all hits * @throws InterruptedException if the thread was interrupted during this operation *//*w ww . j a va 2 s . c om*/ void ensureHitsRead(int number) throws InterruptedException { if (sourceSpansFullyRead || (number >= 0 && hits.size() >= number)) return; synchronized (this) { boolean readAllHits = number < 0; try { int maxHitsToCount = settings.maxHitsToCount(); int maxHitsToRetrieve = settings.maxHitsToRetrieve(); while (readAllHits || hits.size() < number) { // Don't hog the CPU, don't take too long etiquette.behave(); // Stop if we're at the maximum number of hits we want to count if (maxHitsToCount >= 0 && hitsCounted >= maxHitsToCount) { maxHitsCounted = true; break; } // Get the next hit from the spans, moving to the next // segment when necessary. while (true) { while (currentSourceSpans == null) { // Exhausted (or not started yet); get next segment spans. if (spanQuery == null) { // We started from a Spans, not a SpanQuery. We're done now. // (only used in deprecated methods or while testing) return; } atomicReaderContextIndex++; if (atomicReaderContexts != null && atomicReaderContextIndex >= atomicReaderContexts.size()) { sourceSpansFullyRead = true; return; } if (atomicReaderContexts != null) { // Get the atomic reader context and get the next Spans from it. LeafReaderContext context = atomicReaderContexts.get(atomicReaderContextIndex); currentDocBase = context.docBase; Bits liveDocs = context.reader().getLiveDocs(); currentSourceSpans = BLSpansWrapper .optWrapSortUniq(spanQuery.getSpans(context, liveDocs, termContexts)); } else { // TESTING currentDocBase = 0; if (atomicReaderContextIndex > 0) { sourceSpansFullyRead = true; return; } currentSourceSpans = BLSpansWrapper .optWrapSortUniq(spanQuery.getSpans(null, null, termContexts)); } if (currentSourceSpans != null) { // Update the hit query context with our new spans, // and notify the spans of the hit query context // (TODO: figure out if we need to call setHitQueryContext() // for each segment or not; if it's just about capture groups // registering themselves, we only need that for the first Spans. // But it's probably required for backreferences, etc. anyway, // and there won't be that many segments, so it's probably ok) hitQueryContext.setSpans(currentSourceSpans); currentSourceSpans.setHitQueryContext(hitQueryContext); // let captured groups register themselves if (capturedGroups == null && hitQueryContext.numberOfCapturedGroups() > 0) { capturedGroups = new HashMap<>(); } int doc = currentSourceSpans.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) currentSourceSpans = null; // no matching docs in this segment, try next } } // Advance to next hit int start = currentSourceSpans.nextStartPosition(); if (start == Spans.NO_MORE_POSITIONS) { int doc = currentSourceSpans.nextDoc(); if (doc != DocIdSetIterator.NO_MORE_DOCS) { // Go to first hit in doc start = currentSourceSpans.nextStartPosition(); } else { // This one is exhausted; go to the next one. currentSourceSpans = null; } } if (currentSourceSpans != null) { // We're at the next hit. break; } } // Count the hit and add it (unless we've reached the maximum number of hits we // want) hitsCounted++; int hitDoc = currentSourceSpans.docID() + currentDocBase; if (hitDoc != previousHitDoc) { docsCounted++; if (!maxHitsRetrieved) docsRetrieved++; previousHitDoc = hitDoc; } maxHitsRetrieved = maxHitsToRetrieve >= 0 && hits.size() >= maxHitsToRetrieve; if (!maxHitsRetrieved) { Hit hit = currentSourceSpans.getHit(); Hit offsetHit = new Hit(hit.doc + currentDocBase, hit.start, hit.end); if (capturedGroups != null) { Span[] groups = new Span[hitQueryContext.numberOfCapturedGroups()]; hitQueryContext.getCapturedGroups(groups); capturedGroups.put(offsetHit, groups); } hits.add(offsetHit); } } } catch (InterruptedException e) { maxHitsRetrieved = maxHitsCounted = true; // we've stopped retrieving/counting throw e; } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:nl.inl.blacklab.search.lucene.SpansInBucketsAbstract.java
License:Apache License
@Override public int nextBucket() throws IOException { if (currentDoc < 0) { // Not nexted yet, no bucket return -1; }//from www. ja v a2s .co m if (currentDoc == DocIdSetIterator.NO_MORE_DOCS || source.startPosition() == Spans.NO_MORE_POSITIONS) return NO_MORE_BUCKETS; return gatherHitsInternal(); }
From source file:nl.inl.blacklab.search.lucene.SpansInBucketsConsecutive.java
License:Apache License
@Override protected void gatherHits() throws IOException { int lastEnd = source.startPosition(); while (source.startPosition() == lastEnd) { addHitFromSource();//from ww w . jav a 2s .c o m lastEnd = source.endPosition(); if (source.nextStartPosition() == Spans.NO_MORE_POSITIONS) break; } }
From source file:nl.inl.blacklab.search.lucene.SpansInBucketsPerDocument.java
License:Apache License
@Override protected void gatherHits() throws IOException { do {/* w w w . j a va2 s . co m*/ addHitFromSource(); } while (source.nextStartPosition() != Spans.NO_MORE_POSITIONS); }
From source file:nl.inl.blacklab.search.lucene.SpansInBucketsPerStartPoint.java
License:Apache License
@Override public int nextBucket() throws IOException { if (currentDoc < 0) { // Not nexted yet, no bucket return -1; }//from w ww . j av a 2s . co m if (currentSpansStart == Spans.NO_MORE_POSITIONS) return NO_MORE_BUCKETS; return gatherEndPointsAtStartPoint(); }
From source file:nl.inl.blacklab.search.lucene.SpansInBucketsPerStartPoint.java
License:Apache License
/** * Go to the next bucket at or beyond the specified start point. * * Always at least advances to the next bucket, even if we were * already at or beyond the specified target. * * @param targetPos the target start point * @return docID if we're at a valid bucket, or NO_MORE_BUCKETS if we're done. * @throws IOException/*from w w w . j a v a 2 s. co m*/ */ public int advanceBucket(int targetPos) throws IOException { if (currentSpansStart >= targetPos) return nextBucket(); currentSpansStart = source.advanceStartPosition(targetPos); if (currentSpansStart == Spans.NO_MORE_POSITIONS) return NO_MORE_BUCKETS; return gatherEndPointsAtStartPoint(); }