List of usage examples for org.apache.lucene.search.spans Spans advance
public abstract int advance(int target) throws IOException;
From source file:it.cnr.ilc.lc.clavius.search.Tester.java
private static void searchWithContext(String term) { try {/*from w w w . j a v a2 s . co m*/ logger.info("searchWithContext(" + term + ")"); SpanQuery spanQuery = new SpanTermQuery(new Term("content", term)); Directory indexDirectory = FSDirectory.open( Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText")); DirectoryReader indexReader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(indexReader); IndexReader reader = searcher.getIndexReader(); //spanQuery = (SpanQuery) spanQuery.rewrite(reader); //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false); Spans spans = spanQuery.createWeight(searcher, false) .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); // Spans spans2 = weight.getSpans(reader.leaves().get(0), // SpanWeight.Postings.OFFSETS); //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS); ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs; logger.info("hits :" + sc.length); int i; if (null != spans) { // while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) { for (int k = 0; k < sc.length; k++) { int docId = sc[k].doc; logger.info("docID: " + docId); int newDocID = spans.advance(docId); logger.info("newDocID: " + newDocID); int nextSpan = -1; while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) { logger.info("nextSpan : " + nextSpan); logger.info("spans.startPosition(): " + spans.startPosition()); logger.info("spans.endPosition() : " + spans.endPosition()); logger.info("spans.width() : " + spans.width()); Fields fields = reader.getTermVectors(docId); Terms terms = fields.terms("content"); TermsEnum termsEnum = terms.iterator(); BytesRef text; PostingsEnum postingEnum = null; int start = spans.startPosition() - 3; int end = spans.endPosition() + 3; while ((text = termsEnum.next()) != null) { //could store the BytesRef here, but String is easier for this example String s = new String(text.bytes, text.offset, text.length); // DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null); postingEnum = termsEnum.postings(postingEnum); if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { i = 0; int position = -1; while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) { if (position >= start && position <= end) { logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset + " length: " + text.length); } i++; } } } } } } else { logger.info("no " + term + " found!"); } } catch (IOException e) { logger.error(e.getMessage()); } logger.info("End."); }
From source file:nl.inl.blacklab.search.lucene.TestSpansTagsPayload.java
License:Apache License
@Test public void testSkip() throws IOException { int[] aDoc = { 1, 1, 2, 2 }; int[] aStart = { 2, 4, 12, 14 }; int[] aEnd = { 5, 7, 17, 15 }; Spans a = MockSpans.withEndInPayload(aDoc, aStart, aEnd); Spans spans = new SpansTagsPayload(a); spans.advance(2); int[] expDoc = { 2, 2 }; int[] expStart = { 12, 14 }; int[] expEnd = { 17, 15 }; Spans exp = new MockSpans(expDoc, expStart, expEnd); TestUtil.assertEquals(exp, spans, true); }
From source file:uk.co.flax.luwak.util.SpanExtractor.java
License:Apache License
/** * Collect all Spans extracted from a Scorer using a SpanCollector * @param scorer the scorer to extract Spans from * @param collector the SpanCollector//from w w w . j a va2 s . c o m * @param errorOnNoSpans if true, throw an error if no Spans can be extracted * from the Scorer or any of its children * @throws IOException on error */ public static void collect(Scorer scorer, SpanCollector collector, boolean errorOnNoSpans) throws IOException { List<Spans> allSpans = getSpans(scorer, errorOnNoSpans); int doc = scorer.docID(); for (Spans spans : allSpans) { int spanDoc = spans.docID(); // if the Scorer advances lazily, then not all of its subspans may be on // the correct document if (spanDoc == doc || (spanDoc < doc && spans.advance(doc) == doc)) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { spans.collect(collector); } } } }