List of usage examples for org.apache.lucene.search.spans Spans docID
public abstract int docID();
-1 if #nextDoc() or #advance(int) were not called yet. From source file:nl.inl.blacklab.TestUtil.java
License:Apache License
public static void assertEquals(Spans expected, Spans actual, boolean skipFirstNextDoc) throws IOException { int docNumber = 0, hitNumber; boolean firstDoc = true; while (true) { int actualDocId; if (firstDoc && skipFirstNextDoc) { // Actual Spans already skipped to document for testing. Don't .nextDoc() this time. firstDoc = false;/*from w w w .ja va 2s.co m*/ actualDocId = actual.docID(); } else { actualDocId = actual.nextDoc(); } docNumber++; hitNumber = 0; Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", expected.nextDoc(), actualDocId); Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", expected.docID(), actual.docID()); Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", actualDocId, actual.docID()); if (actualDocId == DocIdSetIterator.NO_MORE_DOCS) break; Assert.assertEquals(-1, actual.startPosition()); Assert.assertEquals(-1, actual.endPosition()); boolean first = true; while (true) { int actualStartPos = actual.nextStartPosition(); if (first) { // .nextDoc() should always place us in a document with at least 1 hit first = false; Assert.assertFalse(actualStartPos == Spans.NO_MORE_POSITIONS); } hitNumber++; Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", expected.nextStartPosition(), actualStartPos); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", expected.startPosition(), actual.startPosition()); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", actualStartPos, actual.startPosition()); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": end pos", expected.endPosition(), actual.endPosition()); if (actualStartPos == Spans.NO_MORE_POSITIONS) { Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", actualDocId, actual.docID()); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", Spans.NO_MORE_POSITIONS, actual.nextStartPosition()); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", Spans.NO_MORE_POSITIONS, actual.startPosition()); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": end pos", Spans.NO_MORE_POSITIONS, actual.endPosition()); break; } } } }
From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java
License:Apache License
long countDocs(String field, Query q) throws Exception { BitSet docs = new BitSet(); List<LeafReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); LeafReaderContext leafReaderContext = ctxs.get(0); SpanQuery sq = convert(field, q);/*from ww w.j a v a 2s . co m*/ sq = (SpanQuery) sq.rewrite(reader); SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS); if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { docs.set(spans.docID()); } } } long spanDocHits = docs.cardinality(); // double check with a regular searcher and original query TotalHitCountCollector coll = new TotalHitCountCollector(); searcher.search(q, coll); assertEquals(coll.getTotalHits(), spanDocHits); return spanDocHits; }
From source file:org.tallison.lucene.queryparser.spans.TestSpanOnlyQueryParser.java
License:Apache License
private void testOffsetForSingleSpanMatch(SpanOnlyParser p, String s, int trueDocID, int trueSpanStart, int trueSpanEnd) throws Exception { SpanQuery sq = (SpanQuery) p.parse(s); List<LeafReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); LeafReaderContext ctx = ctxs.get(0); sq = (SpanQuery) sq.rewrite(ctx.reader()); SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); final Spans spans = sw.getSpans(ctx, SpanWeight.Postings.POSITIONS); int i = 0;// w w w . j a v a 2s . c o m int spanStart = -1; int spanEnd = -1; int docID = -1; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { spanStart = spans.startPosition(); spanEnd = spans.endPosition(); docID = spans.docID(); i++; } } assertEquals("should only be one matching span", 1, i); assertEquals("doc id", trueDocID, docID); assertEquals("span start", trueSpanStart, spanStart); assertEquals("span end", trueSpanEnd, spanEnd); }
From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java
License:Apache License
static boolean visit(LeafReaderContext leafCtx, Spans spans, DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException { Document document = leafCtx.reader().document(spans.docID(), visitor.getFields()); DocTokenOffsets offsets = visitor.getDocTokenOffsets(); offsets.reset(leafCtx.docBase, spans.docID(), document); while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { offsets.addOffset(spans.startPosition(), spans.endPosition()); }/*from w w w. j a v a 2s . c o m*/ return visitor.visit(offsets); }
From source file:org.voyanttools.trombone.lucene.search.SpanQueryParserTest.java
License:Open Source License
@Test public void test() throws IOException { // File storageDirectory = TestHelper.getTemporaryTestStorageDirectory(); // Storage storage = new FileStorage(storageDirectory); Storage storage = new MemoryStorage(); Document document;/*from w w w . j a va 2 s.com*/ LuceneManager luceneManager = storage.getLuceneManager(); Bits bits = new Bits.MatchAllBits(2); Map<Term, TermContext> termsMap = new HashMap<Term, TermContext>(); document = new Document(); document.add(new TextField("lexical", "It was a dark and stormy night.", Field.Store.YES)); luceneManager.addDocument(document); document = new Document(); document.add( new TextField("lexical", "It was the best of times it was the worst of times.", Field.Store.YES)); luceneManager.addDocument(document); LeafReader atomicReader = SlowCompositeReaderWrapper.wrap(storage.getLuceneManager().getDirectoryReader()); IndexSearcher indexSearcher = new IndexSearcher(atomicReader); SpanQueryParser spanQueryParser = new SpanQueryParser(atomicReader, storage.getLuceneManager().getAnalyzer()); Map<String, SpanQuery> queriesMap; SpanQuery query; SpanWeight weight; Spans spans; // single term queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); spans.nextDoc(); assertEquals(0, spans.docID()); spans.nextStartPosition(); assertEquals(3, spans.startPosition()); assertEquals(spans.nextStartPosition(), Spans.NO_MORE_POSITIONS); assertEquals(spans.nextDoc(), Spans.NO_MORE_DOCS); // single term with case (this gets converted to lower case) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "It" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("It"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(0, spans.nextStartPosition()); assertEquals(1, spans.nextDoc()); assertEquals(0, spans.nextStartPosition()); assertEquals(6, spans.nextStartPosition()); // single term (ignore quotes) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "\"dark\"" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // two separate terms (not collapsed) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark", "best" }, TokenType.lexical, true); assertEquals(2, queriesMap.size()); query = queriesMap.get("dark"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); query = queriesMap.get("best"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // two separate terms (not collapsed) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark;best" }, TokenType.lexical, true); assertEquals(2, queriesMap.size()); query = queriesMap.get("dark"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); query = queriesMap.get("best"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // two separate terms (not collapsed), with spaces queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { " dark ; best " }, TokenType.lexical, true); assertEquals(2, queriesMap.size()); query = queriesMap.get("dark"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); query = queriesMap.get("best"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // comma-separated terms (collapased) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark,best" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark,best"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // wildcards queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar*,b*t" }, TokenType.lexical, true); // dark and best assertEquals(1, queriesMap.size()); query = queriesMap.get("dar*,b*t"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // two separate wildcards (not collapsed) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar*;bes*" }, TokenType.lexical, true); assertEquals(2, queriesMap.size()); query = queriesMap.get("dar*"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); query = queriesMap.get("bes*"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // phrase queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark and" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark and"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(5, spans.endPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "it was" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("it was"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(0, spans.nextStartPosition()); assertEquals(1, spans.nextDoc()); assertEquals(0, spans.nextStartPosition()); assertEquals(6, spans.nextStartPosition()); // phrase with wildcards queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar* an*" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dar* an*"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(5, spans.endPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // phrase with wildcards queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark stormy~2" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark stormy~2"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(6, spans.endPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // phrase with wildcards (ignored quotes) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "\"dark stormy\"~2" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark stormy~2"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(6, spans.endPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); storage.destroy(); }
From source file:uk.co.flax.luwak.util.SpanExtractor.java
License:Apache License
/** * Collect all Spans extracted from a Scorer using a SpanCollector * @param scorer the scorer to extract Spans from * @param collector the SpanCollector/*from ww w . j a v a 2 s . co m*/ * @param errorOnNoSpans if true, throw an error if no Spans can be extracted * from the Scorer or any of its children * @throws IOException on error */ public static void collect(Scorer scorer, SpanCollector collector, boolean errorOnNoSpans) throws IOException { List<Spans> allSpans = getSpans(scorer, errorOnNoSpans); int doc = scorer.docID(); for (Spans spans : allSpans) { int spanDoc = spans.docID(); // if the Scorer advances lazily, then not all of its subspans may be on // the correct document if (spanDoc == doc || (spanDoc < doc && spans.advance(doc) == doc)) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { spans.collect(collector); } } } }