List of usage examples for org.apache.lucene.search.spans Spans collect
public abstract void collect(SpanCollector collector) throws IOException;
From source file:tw.com.kyle.luminance.LumQuery.java
public List<Integer[]> query_for_offsets(String term, String field, boolean useNearQuery) throws IOException { if (term.length() == 0) { return null; }/*w w w .j a v a2 s .c o m*/ SpanQuery sq = null; if (!useNearQuery) { sq = new SpanTermQuery(new Term(field, term)); } else { SpanNearQuery.Builder builder = new SpanNearQuery.Builder(field, true); for (int i = 0; i < term.length(); ++i) { builder.addClause(new SpanTermQuery(new Term(field, term.substring(i, i + 1)))); } sq = builder.build(); } IndexSearcher searcher = new IndexSearcher(idx_reader); List<Integer[]> offs = new ArrayList<>(); for (LeafReaderContext ctx : idx_reader.leaves()) { SpanWeight weights = sq.createWeight(searcher, false); if (weights == null) { continue; } Spans spans = weights.getSpans(ctx, Postings.OFFSETS); if (spans == null) { System.out.printf("Nothing found for %s%n", term); continue; } int nxtDoc = -1; while ((nxtDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) { final int doc_id = nxtDoc; while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { final int start_pos = spans.startPosition(); final int end_pos = spans.endPosition(); Integer[] off_x = new Integer[] { doc_id, -1, -1 }; spans.collect(new SpanCollector() { @Override public void collectLeaf(PostingsEnum pe, int i, Term term) throws IOException { int s_off = pe.startOffset(); int e_off = pe.endOffset(); if (i == start_pos) off_x[1] = s_off; if (i + 1 == end_pos) off_x[2] = e_off; } @Override public void reset() { throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. } }); offs.add(off_x); } } } return offs; }
From source file:uk.co.flax.luwak.util.SpanExtractor.java
License:Apache License
/** * Collect all Spans extracted from a Scorer using a SpanCollector * @param scorer the scorer to extract Spans from * @param collector the SpanCollector/*from ww w.ja v a 2 s. c om*/ * @param errorOnNoSpans if true, throw an error if no Spans can be extracted * from the Scorer or any of its children * @throws IOException on error */ public static void collect(Scorer scorer, SpanCollector collector, boolean errorOnNoSpans) throws IOException { List<Spans> allSpans = getSpans(scorer, errorOnNoSpans); int doc = scorer.docID(); for (Spans spans : allSpans) { int spanDoc = spans.docID(); // if the Scorer advances lazily, then not all of its subspans may be on // the correct document if (spanDoc == doc || (spanDoc < doc && spans.advance(doc) == doc)) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { spans.collect(collector); } } } }
From source file:uk.co.flax.luwak.util.XNearSpansOrdered.java
License:Apache License
@Override public void collect(SpanCollector collector) throws IOException { for (Spans spans : subSpans) { spans.collect(collector); }//from w w w . j a va 2 s .c om }