List of usage examples for org.apache.lucene.search.spans SpanWeight getSpans
public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException;
From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java
License:Apache License
long countSpans(String field, Query q) throws Exception { List<LeafReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); LeafReaderContext leafReaderContext = ctxs.get(0); SpanQuery sq = convert(field, q);/*from w w w .j a va 2 s .c o m*/ sq = (SpanQuery) sq.rewrite(reader); SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS); long i = 0; if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { i++; } } } return i; }
From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java
License:Apache License
long countDocs(String field, Query q) throws Exception { BitSet docs = new BitSet(); List<LeafReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); LeafReaderContext leafReaderContext = ctxs.get(0); SpanQuery sq = convert(field, q);/*from ww w. j av a 2 s .c o m*/ sq = (SpanQuery) sq.rewrite(reader); SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS); if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { docs.set(spans.docID()); } } } long spanDocHits = docs.cardinality(); // double check with a regular searcher and original query TotalHitCountCollector coll = new TotalHitCountCollector(); searcher.search(q, coll); assertEquals(coll.getTotalHits(), spanDocHits); return spanDocHits; }
From source file:org.tallison.lucene.queryparser.spans.TestSpanOnlyQueryParser.java
License:Apache License
private void testOffsetForSingleSpanMatch(SpanOnlyParser p, String s, int trueDocID, int trueSpanStart, int trueSpanEnd) throws Exception { SpanQuery sq = (SpanQuery) p.parse(s); List<LeafReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); LeafReaderContext ctx = ctxs.get(0); sq = (SpanQuery) sq.rewrite(ctx.reader()); SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); final Spans spans = sw.getSpans(ctx, SpanWeight.Postings.POSITIONS); int i = 0;// ww w .j av a2 s .c om int spanStart = -1; int spanEnd = -1; int docID = -1; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { spanStart = spans.startPosition(); spanEnd = spans.endPosition(); docID = spans.docID(); i++; } } assertEquals("should only be one matching span", 1, i); assertEquals("doc id", trueDocID, docID); assertEquals("span start", trueSpanStart, spanStart); assertEquals("span end", trueSpanEnd, spanEnd); }
From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java
License:Apache License
public static void crawl(SpanQuery query, Query filter, IndexSearcher searcher, DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException { query = (SpanQuery) query.rewrite(searcher.getIndexReader()); SpanWeight w = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); if (filter == null) { for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS); if (spans == null) { continue; }//from ww w. j a v a2 s .c om boolean cont = visitLeafReader(ctx, spans, visitor); if (!cont) { break; } } } else { filter = searcher.rewrite(filter); Weight searcherWeight = searcher.createWeight(filter, ScoreMode.COMPLETE_NO_SCORES, 1.0f); for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { Scorer leafReaderContextScorer = searcherWeight.scorer(ctx); if (leafReaderContextScorer == null) { continue; } //Can we tell from the scorer that there were no hits? //in <= 5.x we could stop here if the filter query had no hits. Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS); if (spans == null) { continue; } DocIdSetIterator filterItr = leafReaderContextScorer.iterator(); if (filterItr == null || filterItr.equals(DocIdSetIterator.empty())) { continue; } boolean cont = visitLeafReader(ctx, spans, filterItr, visitor); if (!cont) { break; } } } }
From source file:org.voyanttools.trombone.lucene.CorpusMapper.java
License:Open Source License
/** * Get a Spans that filters for the specified BitSet. * @param spanQuery//from w w w . j a v a 2 s.com * @param bitSet * @return * @throws IOException */ public Spans getFilteredSpans(SpanQuery spanQuery, BitSet bitSet) throws IOException { SpanWeight weight = spanQuery.createWeight(getSearcher(), false); Spans spans = weight.getSpans(getLeafReader().getContext(), SpanWeight.Postings.POSITIONS); return spans != null ? new DocumentFilterSpans(spans, bitSet) : null; }
From source file:org.voyanttools.trombone.lucene.search.SpanQueryParserTest.java
License:Open Source License
@Test public void test() throws IOException { // File storageDirectory = TestHelper.getTemporaryTestStorageDirectory(); // Storage storage = new FileStorage(storageDirectory); Storage storage = new MemoryStorage(); Document document;/*from w w w . ja v a2 s .co m*/ LuceneManager luceneManager = storage.getLuceneManager(); Bits bits = new Bits.MatchAllBits(2); Map<Term, TermContext> termsMap = new HashMap<Term, TermContext>(); document = new Document(); document.add(new TextField("lexical", "It was a dark and stormy night.", Field.Store.YES)); luceneManager.addDocument(document); document = new Document(); document.add( new TextField("lexical", "It was the best of times it was the worst of times.", Field.Store.YES)); luceneManager.addDocument(document); LeafReader atomicReader = SlowCompositeReaderWrapper.wrap(storage.getLuceneManager().getDirectoryReader()); IndexSearcher indexSearcher = new IndexSearcher(atomicReader); SpanQueryParser spanQueryParser = new SpanQueryParser(atomicReader, storage.getLuceneManager().getAnalyzer()); Map<String, SpanQuery> queriesMap; SpanQuery query; SpanWeight weight; Spans spans; // single term queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); spans.nextDoc(); assertEquals(0, spans.docID()); spans.nextStartPosition(); assertEquals(3, spans.startPosition()); assertEquals(spans.nextStartPosition(), Spans.NO_MORE_POSITIONS); assertEquals(spans.nextDoc(), Spans.NO_MORE_DOCS); // single term with case (this gets converted to lower case) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "It" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("It"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(0, spans.nextStartPosition()); assertEquals(1, spans.nextDoc()); assertEquals(0, spans.nextStartPosition()); assertEquals(6, spans.nextStartPosition()); // single term (ignore quotes) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "\"dark\"" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // two separate terms (not collapsed) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark", "best" }, TokenType.lexical, true); assertEquals(2, queriesMap.size()); query = queriesMap.get("dark"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); query = queriesMap.get("best"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // two separate terms (not collapsed) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark;best" }, TokenType.lexical, true); assertEquals(2, queriesMap.size()); query = queriesMap.get("dark"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); query = queriesMap.get("best"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // two separate terms (not collapsed), with spaces queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { " dark ; best " }, TokenType.lexical, true); assertEquals(2, queriesMap.size()); query = queriesMap.get("dark"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); query = queriesMap.get("best"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // comma-separated terms (collapased) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark,best" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark,best"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // wildcards queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar*,b*t" }, TokenType.lexical, true); // dark and best assertEquals(1, queriesMap.size()); query = queriesMap.get("dar*,b*t"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // two separate wildcards (not collapsed) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar*;bes*" }, TokenType.lexical, true); assertEquals(2, queriesMap.size()); query = queriesMap.get("dar*"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); query = queriesMap.get("bes*"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(1, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // phrase queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark and" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark and"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(5, spans.endPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "it was" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("it was"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(0, spans.nextStartPosition()); assertEquals(1, spans.nextDoc()); assertEquals(0, spans.nextStartPosition()); assertEquals(6, spans.nextStartPosition()); // phrase with wildcards queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dar* an*" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dar* an*"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(5, spans.endPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // phrase with wildcards queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "dark stormy~2" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark stormy~2"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(6, spans.endPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); // phrase with wildcards (ignored quotes) queriesMap = spanQueryParser.getSpanQueriesMap(new String[] { "\"dark stormy\"~2" }, TokenType.lexical, true); assertEquals(1, queriesMap.size()); query = queriesMap.get("dark stormy~2"); weight = query.createWeight(indexSearcher, false); spans = weight.getSpans(atomicReader.getContext(), SpanWeight.Postings.POSITIONS); assertEquals(0, spans.nextDoc()); assertEquals(3, spans.nextStartPosition()); assertEquals(6, spans.endPosition()); assertEquals(spans.NO_MORE_POSITIONS, spans.nextStartPosition()); assertEquals(spans.NO_MORE_DOCS, spans.nextDoc()); storage.destroy(); }
From source file:tw.com.kyle.luminance.corpus.compute.CollocateFromIndex.java
private FreqInfo query_ngram(String ngram) throws IOException { SpanQuery sq = null;/*from w ww. java 2s .c om*/ final String FIELD = "content"; SpanNearQuery.Builder builder = new SpanNearQuery.Builder(FIELD, true); if (ngram.length() > 1) { for (int i = 0; i < ngram.length(); ++i) { builder.addClause(new SpanTermQuery(new Term(FIELD, ngram.substring(i, i + 1)))); } sq = builder.build(); } else { sq = new SpanTermQuery(new Term(FIELD, ngram)); } int all_frag_freq = 0; int all_tok_freq = 0; int doc_freq = 0; for (LeafReaderContext ctx : reader.leaves()) { SpanWeight weights = sq.createWeight(searcher, false); if (weights == null) { continue; } Spans spans = weights.getSpans(ctx, SpanWeight.Postings.POSITIONS); if (spans == null) { // System.out.printf("Nothing found for %s%n", ngram); continue; } int nxtDoc = 0; while ((nxtDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) { all_frag_freq += 1; if (doc_set.contains(nxtDoc)) { doc_freq += 1; } while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { all_tok_freq += 1; } } } // System.out.printf("Occurrence frag: %d, doc: %d(tok: %d)%n", all_frag_freq, discourse_freq, tok_freq); FreqInfo freq_info = new FreqInfo(); freq_info.nDocuments = doc_freq; freq_info.nFragments = all_frag_freq; freq_info.nTokens = all_tok_freq; return freq_info; }
From source file:tw.com.kyle.luminance.LumQuery.java
public List<Integer[]> query_for_offsets(String term, String field, boolean useNearQuery) throws IOException { if (term.length() == 0) { return null; }// ww w . j ava2s . c om SpanQuery sq = null; if (!useNearQuery) { sq = new SpanTermQuery(new Term(field, term)); } else { SpanNearQuery.Builder builder = new SpanNearQuery.Builder(field, true); for (int i = 0; i < term.length(); ++i) { builder.addClause(new SpanTermQuery(new Term(field, term.substring(i, i + 1)))); } sq = builder.build(); } IndexSearcher searcher = new IndexSearcher(idx_reader); List<Integer[]> offs = new ArrayList<>(); for (LeafReaderContext ctx : idx_reader.leaves()) { SpanWeight weights = sq.createWeight(searcher, false); if (weights == null) { continue; } Spans spans = weights.getSpans(ctx, Postings.OFFSETS); if (spans == null) { System.out.printf("Nothing found for %s%n", term); continue; } int nxtDoc = -1; while ((nxtDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) { final int doc_id = nxtDoc; while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { final int start_pos = spans.startPosition(); final int end_pos = spans.endPosition(); Integer[] off_x = new Integer[] { doc_id, -1, -1 }; spans.collect(new SpanCollector() { @Override public void collectLeaf(PostingsEnum pe, int i, Term term) throws IOException { int s_off = pe.startOffset(); int e_off = pe.endOffset(); if (i == start_pos) off_x[1] = s_off; if (i + 1 == end_pos) off_x[2] = e_off; } @Override public void reset() { throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. } }); offs.add(off_x); } } } return offs; }