Example usage for org.apache.lucene.search.spans Spans docID

List of usage examples for org.apache.lucene.search.spans Spans docID

Introduction

In this page you can find the example usage for org.apache.lucene.search.spans Spans docID.

Prototype

public abstract int docID();

Source Link

Document

Returns the following:
  • -1 if #nextDoc() or #advance(int) were not called yet.

    Usage

    From source file:nl.inl.blacklab.TestUtil.java

    License:Apache License

    public static void assertEquals(Spans expected, Spans actual, boolean skipFirstNextDoc) throws IOException {
        int docNumber = 0, hitNumber;
        boolean firstDoc = true;
        while (true) {
            int actualDocId;
            if (firstDoc && skipFirstNextDoc) {
                // Actual Spans already skipped to document for testing. Don't .nextDoc() this time.
                firstDoc = false;/*from   w w  w .ja  va 2s.co m*/
                actualDocId = actual.docID();
            } else {
                actualDocId = actual.nextDoc();
            }
            docNumber++;
            hitNumber = 0;
            Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", expected.nextDoc(), actualDocId);
            Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", expected.docID(), actual.docID());
            Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", actualDocId, actual.docID());
            if (actualDocId == DocIdSetIterator.NO_MORE_DOCS)
                break;
            Assert.assertEquals(-1, actual.startPosition());
            Assert.assertEquals(-1, actual.endPosition());
            boolean first = true;
            while (true) {
                int actualStartPos = actual.nextStartPosition();
                if (first) {
                    // .nextDoc() should always place us in a document with at least 1 hit
                    first = false;
                    Assert.assertFalse(actualStartPos == Spans.NO_MORE_POSITIONS);
                }
                hitNumber++;
                Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", expected.nextStartPosition(),
                        actualStartPos);
                Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", expected.startPosition(),
                        actual.startPosition());
                Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", actualStartPos,
                        actual.startPosition());
                Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": end pos", expected.endPosition(),
                        actual.endPosition());
                if (actualStartPos == Spans.NO_MORE_POSITIONS) {
                    Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", actualDocId, actual.docID());
                    Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", Spans.NO_MORE_POSITIONS,
                            actual.nextStartPosition());
                    Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", Spans.NO_MORE_POSITIONS,
                            actual.startPosition());
                    Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": end pos", Spans.NO_MORE_POSITIONS,
                            actual.endPosition());
                    break;
                }
            }
        }
    }
    

    From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java

    License:Apache License

    long countDocs(String field, Query q) throws Exception {
        BitSet docs = new BitSet();
        List<LeafReaderContext> ctxs = reader.leaves();
        assert (ctxs.size() == 1);
        LeafReaderContext leafReaderContext = ctxs.get(0);
        SpanQuery sq = convert(field, q);/*from   ww w.j  a v a 2s  .  co m*/
        sq = (SpanQuery) sq.rewrite(reader);
        SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    
        final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);
        if (spans != null) {
            while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
                while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                    docs.set(spans.docID());
                }
            }
        }
        long spanDocHits = docs.cardinality();
        // double check with a regular searcher and original query
        TotalHitCountCollector coll = new TotalHitCountCollector();
        searcher.search(q, coll);
        assertEquals(coll.getTotalHits(), spanDocHits);
        return spanDocHits;
    }
    

    From source file:org.tallison.lucene.queryparser.spans.TestSpanOnlyQueryParser.java

    License:Apache License

    private void testOffsetForSingleSpanMatch(SpanOnlyParser p, String s, int trueDocID, int trueSpanStart,
            int trueSpanEnd) throws Exception {
        SpanQuery sq = (SpanQuery) p.parse(s);
        List<LeafReaderContext> ctxs = reader.leaves();
        assert (ctxs.size() == 1);
        LeafReaderContext ctx = ctxs.get(0);
        sq = (SpanQuery) sq.rewrite(ctx.reader());
        SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    
        final Spans spans = sw.getSpans(ctx, SpanWeight.Postings.POSITIONS);
    
        int i = 0;//  w w w . j a v a  2s . c o m
        int spanStart = -1;
        int spanEnd = -1;
        int docID = -1;
    
        while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                spanStart = spans.startPosition();
                spanEnd = spans.endPosition();
                docID = spans.docID();
                i++;
            }
        }
        assertEquals("should only be one matching span", 1, i);
        assertEquals("doc id", trueDocID, docID);
        assertEquals("span start", trueSpanStart, spanStart);
        assertEquals("span end", trueSpanEnd, spanEnd);
    }
    

    From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java

    License:Apache License

    static boolean visit(LeafReaderContext leafCtx, Spans spans, DocTokenOffsetsVisitor visitor)
            throws IOException, TargetTokenNotFoundException {
        Document document = leafCtx.reader().document(spans.docID(), visitor.getFields());
        DocTokenOffsets offsets = visitor.getDocTokenOffsets();
        offsets.reset(leafCtx.docBase, spans.docID(), document);
        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
            offsets.addOffset(spans.startPosition(), spans.endPosition());
        }/*from w w w. j a v  a  2s .  c  o m*/
        return visitor.visit(offsets);
    }
    

    From source file:org.voyanttools.trombone.lucene.search.SpanQueryParserTest.java

    License:Open Source License

    @Test
    public void test() throws IOException {
    
        //      File storageDirectory = TestHelper.getTemporaryTestStorageDirectory();
        //      Storage storage = new FileStorage(storageDirectory);
        Storage storage = new MemoryStorage();
        Document document;/*from  w  w w .  j  a va 2  s.com*/
        LuceneManager luceneManager = storage.getLuceneManager();
        Bits bits = new Bits.MatchAllBits(2);
        Map<Term, TermContext> termsMap = new HashMap<Term, TermContext>();
    
        document = new Document();
        document.add(new TextField("lexical", "It was a dark and stormy night.", Field.Store.YES));
        luceneManager.addDocument(document);
        document = new Document();
        document.add(
                new TextField("lexical", "It was the best of times it was the worst of times.", Field.Store.YES));
        luceneManager.addDocument(document);
    
        LeafReader atomicReader = SlowCompositeReaderWrapper.wrap(storage.getLuceneManager().getDirectoryReader());
        IndexSearcher indexSearcher = new IndexSearcher(atomicReader);
    
        SpanQueryParser spanQueryParser = new SpanQueryParser(atomicReader,
                storage.getLuceneManager().getAnalyzer());
    
        Map<String, SpanQuery> queriesMap;
        SpanQuery query;
        SpanWeight weight;
        Spans spans;
    
        // single term
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark" }, TokenType.lexical, true);
        assertEquals(1, queriesMap.size());
        query = queriesMap.get("dark");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        spans.nextDoc();
        assertEquals(0, spans.docID());
        spans.nextStartPosition();
        assertEquals(3, spans.startPosition());
        assertEquals(spans.nextStartPosition(), Spans.NO_MORE_POSITIONS);
        assertEquals(spans.nextDoc(), Spans.NO_MORE_DOCS);
    
        // single term with case (this gets converted to lower case)
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "It" }, TokenType.lexical, true);
        assertEquals(1, queriesMap.size());
        query = queriesMap.get("It");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(0, spans.nextStartPosition());
        assertEquals(1, spans.nextDoc());
        assertEquals(0, spans.nextStartPosition());
        assertEquals(6, spans.nextStartPosition());
    
        // single term (ignore quotes)
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "\"dark\"" }, TokenType.lexical, true);
        assertEquals(1, queriesMap.size());
        query = queriesMap.get("dark");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        // two separate terms (not collapsed)
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark", "best" }, TokenType.lexical, true);
        assertEquals(2, queriesMap.size());
    
        query = queriesMap.get("dark");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        query = queriesMap.get("best");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(1, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        // two separate terms (not collapsed)
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark;best" }, TokenType.lexical, true);
        assertEquals(2, queriesMap.size());
    
        query = queriesMap.get("dark");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        query = queriesMap.get("best");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(1, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        // two separate terms (not collapsed), with spaces
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { " dark ; best " }, TokenType.lexical, true);
        assertEquals(2, queriesMap.size());
    
        query = queriesMap.get("dark");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        query = queriesMap.get("best");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(1, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        // comma-separated terms (collapased)
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark,best" }, TokenType.lexical, true);
        assertEquals(1, queriesMap.size());
    
        query = queriesMap.get("dark,best");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(1, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        // wildcards
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar*,b*t" }, TokenType.lexical, true); // dark and best
        assertEquals(1, queriesMap.size());
        query = queriesMap.get("dar*,b*t");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(1, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        // two separate wildcards (not collapsed)
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar*;bes*" }, TokenType.lexical, true);
        assertEquals(2, queriesMap.size());
    
        query = queriesMap.get("dar*");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        query = queriesMap.get("bes*");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(1, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        // phrase
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark and" }, TokenType.lexical, true);
        assertEquals(1, queriesMap.size());
        query = queriesMap.get("dark and");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(5, spans.endPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "it was" }, TokenType.lexical, true);
        assertEquals(1, queriesMap.size());
        query = queriesMap.get("it was");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(0, spans.nextStartPosition());
        assertEquals(1, spans.nextDoc());
        assertEquals(0, spans.nextStartPosition());
        assertEquals(6, spans.nextStartPosition());
    
        // phrase with wildcards
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar* an*" }, TokenType.lexical, true);
        assertEquals(1, queriesMap.size());
        query = queriesMap.get("dar* an*");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(5, spans.endPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        // phrase with wildcards
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark stormy~2" }, TokenType.lexical, true);
        assertEquals(1, queriesMap.size());
        query = queriesMap.get("dark stormy~2");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(6, spans.endPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        // phrase with wildcards (ignored quotes)
        queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "\"dark stormy\"~2" }, TokenType.lexical,
                true);
        assertEquals(1, queriesMap.size());
        query = queriesMap.get("dark stormy~2");
        weight = query.createWeight(indexSearcher, false);
        spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
        assertEquals(0, spans.nextDoc());
        assertEquals(3, spans.nextStartPosition());
        assertEquals(6, spans.endPosition());
        assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
        assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());
    
        storage.destroy();
    }
    

    From source file:uk.co.flax.luwak.util.SpanExtractor.java

    License:Apache License

    /**
     * Collect all Spans extracted from a Scorer using a SpanCollector
     * @param scorer the scorer to extract Spans from
     * @param collector the SpanCollector/*from   ww w  .  j a v  a 2  s . co m*/
     * @param errorOnNoSpans if true, throw an error if no Spans can be extracted
     *                       from the Scorer or any of its children
     * @throws IOException on error
     */
    public static void collect(Scorer scorer, SpanCollector collector, boolean errorOnNoSpans) throws IOException {
    
        List<Spans> allSpans = getSpans(scorer, errorOnNoSpans);
        int doc = scorer.docID();
    
        for (Spans spans : allSpans) {
            int spanDoc = spans.docID();
            // if the Scorer advances lazily, then not all of its subspans may be on
            // the correct document
            if (spanDoc == doc || (spanDoc < doc && spans.advance(doc) == doc)) {
                while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                    spans.collect(collector);
                }
            }
        }
    
    }