Example usage for org.apache.lucene.search.spans SpanWeight getSpans

List of usage examples for org.apache.lucene.search.spans SpanWeight getSpans

Introduction

In this page you can find the example usage for org.apache.lucene.search.spans SpanWeight getSpans.

Prototype

public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException;

Source Link

Document

Expert: Return a Spans object iterating over matches from this Weight

Usage

From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java

License:Apache License

long countSpans(String field, Query q) throws Exception {
    List<LeafReaderContext> ctxs = reader.leaves();

    assert (ctxs.size() == 1);
    LeafReaderContext leafReaderContext = ctxs.get(0);
    SpanQuery sq = convert(field, q);/*from w w  w .j  a  va 2 s .c  o  m*/
    sq = (SpanQuery) sq.rewrite(reader);
    SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);

    final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);

    long i = 0;
    if (spans != null) {
        while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                i++;
            }
        }
    }
    return i;
}

From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java

License:Apache License

long countDocs(String field, Query q) throws Exception {
    BitSet docs = new BitSet();
    List<LeafReaderContext> ctxs = reader.leaves();
    assert (ctxs.size() == 1);
    LeafReaderContext leafReaderContext = ctxs.get(0);
    SpanQuery sq = convert(field, q);/*from  ww w. j  av a  2  s  .c o  m*/
    sq = (SpanQuery) sq.rewrite(reader);
    SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);

    final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);
    if (spans != null) {
        while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                docs.set(spans.docID());
            }
        }
    }
    long spanDocHits = docs.cardinality();
    // double check with a regular searcher and original query
    TotalHitCountCollector coll = new TotalHitCountCollector();
    searcher.search(q, coll);
    assertEquals(coll.getTotalHits(), spanDocHits);
    return spanDocHits;
}

From source file:org.tallison.lucene.queryparser.spans.TestSpanOnlyQueryParser.java

License:Apache License

private void testOffsetForSingleSpanMatch(SpanOnlyParser p, String s, int trueDocID, int trueSpanStart,
        int trueSpanEnd) throws Exception {
    SpanQuery sq = (SpanQuery) p.parse(s);
    List<LeafReaderContext> ctxs = reader.leaves();
    assert (ctxs.size() == 1);
    LeafReaderContext ctx = ctxs.get(0);
    sq = (SpanQuery) sq.rewrite(ctx.reader());
    SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);

    final Spans spans = sw.getSpans(ctx, SpanWeight.Postings.POSITIONS);

    int i = 0;// ww  w  .j av a2 s .c  om
    int spanStart = -1;
    int spanEnd = -1;
    int docID = -1;

    while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
            spanStart = spans.startPosition();
            spanEnd = spans.endPosition();
            docID = spans.docID();
            i++;
        }
    }
    assertEquals("should only be one matching span", 1, i);
    assertEquals("doc id", trueDocID, docID);
    assertEquals("span start", trueSpanStart, spanStart);
    assertEquals("span end", trueSpanEnd, spanEnd);
}

From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java

License:Apache License

public static void crawl(SpanQuery query, Query filter, IndexSearcher searcher, DocTokenOffsetsVisitor visitor)
        throws IOException, TargetTokenNotFoundException {

    query = (SpanQuery) query.rewrite(searcher.getIndexReader());

    SpanWeight w = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    if (filter == null) {
        for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {

            Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS);
            if (spans == null) {
                continue;
            }//from   ww  w. j  a v a2  s  .c om
            boolean cont = visitLeafReader(ctx, spans, visitor);
            if (!cont) {
                break;
            }
        }
    } else {
        filter = searcher.rewrite(filter);
        Weight searcherWeight = searcher.createWeight(filter, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
        for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
            Scorer leafReaderContextScorer = searcherWeight.scorer(ctx);
            if (leafReaderContextScorer == null) {
                continue;
            }
            //Can we tell from the scorer that there were no hits?
            //in <= 5.x we could stop here if the filter query had no hits.

            Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS);
            if (spans == null) {
                continue;
            }
            DocIdSetIterator filterItr = leafReaderContextScorer.iterator();

            if (filterItr == null || filterItr.equals(DocIdSetIterator.empty())) {
                continue;
            }
            boolean cont = visitLeafReader(ctx, spans, filterItr, visitor);
            if (!cont) {
                break;
            }
        }
    }
}

From source file:org.voyanttools.trombone.lucene.CorpusMapper.java

License:Open Source License

/**
 * Get a Spans that filters for the specified BitSet.
 * @param spanQuery//from  w w w  .  j a  v  a  2 s.com
 * @param bitSet
 * @return
 * @throws IOException
 */
public Spans getFilteredSpans(SpanQuery spanQuery, BitSet bitSet) throws IOException {
    SpanWeight weight = spanQuery.createWeight(getSearcher(), false);
    Spans spans = weight.getSpans(getLeafReader().getContext(), SpanWeight.Postings.POSITIONS);
    return spans != null ? new DocumentFilterSpans(spans, bitSet) : null;
}

From source file:org.voyanttools.trombone.lucene.search.SpanQueryParserTest.java

License:Open Source License

@Test
public void test() throws IOException {

    //      File storageDirectory = TestHelper.getTemporaryTestStorageDirectory();
    //      Storage storage = new FileStorage(storageDirectory);
    Storage storage = new MemoryStorage();
    Document document;/*from w w  w .  ja  v a2  s .co  m*/
    LuceneManager luceneManager = storage.getLuceneManager();
    Bits bits = new Bits.MatchAllBits(2);
    Map<Term, TermContext> termsMap = new HashMap<Term, TermContext>();

    document = new Document();
    document.add(new TextField("lexical", "It was a dark and stormy night.", Field.Store.YES));
    luceneManager.addDocument(document);
    document = new Document();
    document.add(
            new TextField("lexical", "It was the best of times it was the worst of times.", Field.Store.YES));
    luceneManager.addDocument(document);

    LeafReader atomicReader = SlowCompositeReaderWrapper.wrap(storage.getLuceneManager().getDirectoryReader());
    IndexSearcher indexSearcher = new IndexSearcher(atomicReader);

    SpanQueryParser spanQueryParser = new SpanQueryParser(atomicReader,
            storage.getLuceneManager().getAnalyzer());

    Map<String, SpanQuery> queriesMap;
    SpanQuery query;
    SpanWeight weight;
    Spans spans;

    // single term
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark" }, TokenType.lexical, true);
    assertEquals(1, queriesMap.size());
    query = queriesMap.get("dark");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    spans.nextDoc();
    assertEquals(0, spans.docID());
    spans.nextStartPosition();
    assertEquals(3, spans.startPosition());
    assertEquals(spans.nextStartPosition(), Spans.NO_MORE_POSITIONS);
    assertEquals(spans.nextDoc(), Spans.NO_MORE_DOCS);

    // single term with case (this gets converted to lower case)
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "It" }, TokenType.lexical, true);
    assertEquals(1, queriesMap.size());
    query = queriesMap.get("It");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(0, spans.nextStartPosition());
    assertEquals(1, spans.nextDoc());
    assertEquals(0, spans.nextStartPosition());
    assertEquals(6, spans.nextStartPosition());

    // single term (ignore quotes)
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "\"dark\"" }, TokenType.lexical, true);
    assertEquals(1, queriesMap.size());
    query = queriesMap.get("dark");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    // two separate terms (not collapsed)
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark", "best" }, TokenType.lexical, true);
    assertEquals(2, queriesMap.size());

    query = queriesMap.get("dark");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    query = queriesMap.get("best");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(1, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    // two separate terms (not collapsed)
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark;best" }, TokenType.lexical, true);
    assertEquals(2, queriesMap.size());

    query = queriesMap.get("dark");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    query = queriesMap.get("best");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(1, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    // two separate terms (not collapsed), with spaces
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { " dark ; best " }, TokenType.lexical, true);
    assertEquals(2, queriesMap.size());

    query = queriesMap.get("dark");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    query = queriesMap.get("best");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(1, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    // comma-separated terms (collapased)
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark,best" }, TokenType.lexical, true);
    assertEquals(1, queriesMap.size());

    query = queriesMap.get("dark,best");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(1, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    // wildcards
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar*,b*t" }, TokenType.lexical, true); // dark and best
    assertEquals(1, queriesMap.size());
    query = queriesMap.get("dar*,b*t");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(1, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    // two separate wildcards (not collapsed)
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar*;bes*" }, TokenType.lexical, true);
    assertEquals(2, queriesMap.size());

    query = queriesMap.get("dar*");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    query = queriesMap.get("bes*");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(1, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    // phrase
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark and" }, TokenType.lexical, true);
    assertEquals(1, queriesMap.size());
    query = queriesMap.get("dark and");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(5, spans.endPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "it was" }, TokenType.lexical, true);
    assertEquals(1, queriesMap.size());
    query = queriesMap.get("it was");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(0, spans.nextStartPosition());
    assertEquals(1, spans.nextDoc());
    assertEquals(0, spans.nextStartPosition());
    assertEquals(6, spans.nextStartPosition());

    // phrase with wildcards
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar* an*" }, TokenType.lexical, true);
    assertEquals(1, queriesMap.size());
    query = queriesMap.get("dar* an*");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(5, spans.endPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    // phrase with wildcards
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark stormy~2" }, TokenType.lexical, true);
    assertEquals(1, queriesMap.size());
    query = queriesMap.get("dark stormy~2");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(6, spans.endPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    // phrase with wildcards (ignored quotes)
    queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "\"dark stormy\"~2" }, TokenType.lexical,
            true);
    assertEquals(1, queriesMap.size());
    query = queriesMap.get("dark stormy~2");
    weight = query.createWeight(indexSearcher, false);
    spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS);
    assertEquals(0, spans.nextDoc());
    assertEquals(3, spans.nextStartPosition());
    assertEquals(6, spans.endPosition());
    assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition());
    assertEquals(spans.NO_MORE_DOCS, spans.nextDoc());

    storage.destroy();
}

From source file:tw.com.kyle.luminance.corpus.compute.CollocateFromIndex.java

private FreqInfo query_ngram(String ngram) throws IOException {
    SpanQuery sq = null;/*from  w ww.  java  2s  .c om*/
    final String FIELD = "content";
    SpanNearQuery.Builder builder = new SpanNearQuery.Builder(FIELD, true);
    if (ngram.length() > 1) {
        for (int i = 0; i < ngram.length(); ++i) {
            builder.addClause(new SpanTermQuery(new Term(FIELD, ngram.substring(i, i + 1))));
        }
        sq = builder.build();
    } else {
        sq = new SpanTermQuery(new Term(FIELD, ngram));
    }

    int all_frag_freq = 0;
    int all_tok_freq = 0;
    int doc_freq = 0;
    for (LeafReaderContext ctx : reader.leaves()) {

        SpanWeight weights = sq.createWeight(searcher, false);
        if (weights == null) {
            continue;
        }
        Spans spans = weights.getSpans(ctx, SpanWeight.Postings.POSITIONS);
        if (spans == null) {
            // System.out.printf("Nothing found for %s%n", ngram);
            continue;
        }
        int nxtDoc = 0;

        while ((nxtDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) {
            all_frag_freq += 1;
            if (doc_set.contains(nxtDoc)) {
                doc_freq += 1;
            }
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                all_tok_freq += 1;
            }
        }
    }

    // System.out.printf("Occurrence frag: %d, doc: %d(tok: %d)%n", all_frag_freq, discourse_freq, tok_freq);        
    FreqInfo freq_info = new FreqInfo();
    freq_info.nDocuments = doc_freq;
    freq_info.nFragments = all_frag_freq;
    freq_info.nTokens = all_tok_freq;
    return freq_info;
}

From source file:tw.com.kyle.luminance.LumQuery.java

public List<Integer[]> query_for_offsets(String term, String field, boolean useNearQuery) throws IOException {
    if (term.length() == 0) {
        return null;
    }//  ww  w .  j ava2s  .  c om

    SpanQuery sq = null;
    if (!useNearQuery) {
        sq = new SpanTermQuery(new Term(field, term));
    } else {

        SpanNearQuery.Builder builder = new SpanNearQuery.Builder(field, true);
        for (int i = 0; i < term.length(); ++i) {
            builder.addClause(new SpanTermQuery(new Term(field, term.substring(i, i + 1))));
        }
        sq = builder.build();
    }

    IndexSearcher searcher = new IndexSearcher(idx_reader);
    List<Integer[]> offs = new ArrayList<>();
    for (LeafReaderContext ctx : idx_reader.leaves()) {

        SpanWeight weights = sq.createWeight(searcher, false);
        if (weights == null) {
            continue;
        }
        Spans spans = weights.getSpans(ctx, Postings.OFFSETS);
        if (spans == null) {
            System.out.printf("Nothing found for %s%n", term);
            continue;
        }

        int nxtDoc = -1;
        while ((nxtDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) {
            final int doc_id = nxtDoc;
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                final int start_pos = spans.startPosition();
                final int end_pos = spans.endPosition();
                Integer[] off_x = new Integer[] { doc_id, -1, -1 };
                spans.collect(new SpanCollector() {
                    @Override
                    public void collectLeaf(PostingsEnum pe, int i, Term term) throws IOException {
                        int s_off = pe.startOffset();
                        int e_off = pe.endOffset();
                        if (i == start_pos)
                            off_x[1] = s_off;
                        if (i + 1 == end_pos)
                            off_x[2] = e_off;
                    }

                    @Override
                    public void reset() {
                        throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
                    }
                });
                offs.add(off_x);
            }

        }

    }

    return offs;
}