Example usage for org.apache.lucene.search.spans Spans collect

List of usage examples for org.apache.lucene.search.spans Spans collect

Introduction

In this page you can find the example usage for org.apache.lucene.search.spans Spans collect.

Prototype

public abstract void collect(SpanCollector collector) throws IOException;

Source Link

Document

Collect postings data from the leaves of the current Spans.

Usage

From source file:tw.com.kyle.luminance.LumQuery.java

public List<Integer[]> query_for_offsets(String term, String field, boolean useNearQuery) throws IOException {
    if (term.length() == 0) {
        return null;
    }/*w  w  w  .j  a  v  a2  s  .c o  m*/

    SpanQuery sq = null;
    if (!useNearQuery) {
        sq = new SpanTermQuery(new Term(field, term));
    } else {

        SpanNearQuery.Builder builder = new SpanNearQuery.Builder(field, true);
        for (int i = 0; i < term.length(); ++i) {
            builder.addClause(new SpanTermQuery(new Term(field, term.substring(i, i + 1))));
        }
        sq = builder.build();
    }

    IndexSearcher searcher = new IndexSearcher(idx_reader);
    List<Integer[]> offs = new ArrayList<>();
    for (LeafReaderContext ctx : idx_reader.leaves()) {

        SpanWeight weights = sq.createWeight(searcher, false);
        if (weights == null) {
            continue;
        }
        Spans spans = weights.getSpans(ctx, Postings.OFFSETS);
        if (spans == null) {
            System.out.printf("Nothing found for %s%n", term);
            continue;
        }

        int nxtDoc = -1;
        while ((nxtDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) {
            final int doc_id = nxtDoc;
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                final int start_pos = spans.startPosition();
                final int end_pos = spans.endPosition();
                Integer[] off_x = new Integer[] { doc_id, -1, -1 };
                spans.collect(new SpanCollector() {
                    @Override
                    public void collectLeaf(PostingsEnum pe, int i, Term term) throws IOException {
                        int s_off = pe.startOffset();
                        int e_off = pe.endOffset();
                        if (i == start_pos)
                            off_x[1] = s_off;
                        if (i + 1 == end_pos)
                            off_x[2] = e_off;
                    }

                    @Override
                    public void reset() {
                        throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
                    }
                });
                offs.add(off_x);
            }

        }

    }

    return offs;
}

From source file:uk.co.flax.luwak.util.SpanExtractor.java

License:Apache License

/**
 * Collect all Spans extracted from a Scorer using a SpanCollector
 * @param scorer the scorer to extract Spans from
 * @param collector the SpanCollector/*from  ww w.ja v  a  2  s. c om*/
 * @param errorOnNoSpans if true, throw an error if no Spans can be extracted
 *                       from the Scorer or any of its children
 * @throws IOException on error
 */
public static void collect(Scorer scorer, SpanCollector collector, boolean errorOnNoSpans) throws IOException {

    List<Spans> allSpans = getSpans(scorer, errorOnNoSpans);
    int doc = scorer.docID();

    for (Spans spans : allSpans) {
        int spanDoc = spans.docID();
        // if the Scorer advances lazily, then not all of its subspans may be on
        // the correct document
        if (spanDoc == doc || (spanDoc < doc && spans.advance(doc) == doc)) {
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                spans.collect(collector);
            }
        }
    }

}

From source file:uk.co.flax.luwak.util.XNearSpansOrdered.java

License:Apache License

@Override
public void collect(SpanCollector collector) throws IOException {
    for (Spans spans : subSpans) {
        spans.collect(collector);
    }//from w w  w  .  j  a  va  2 s .c om
}