List of usage examples for org.apache.lucene.search.spans SpanQuery rewrite
public Query rewrite(IndexReader reader) throws IOException
From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java
License:Apache License
/** * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>SpanQuery</code>. * /* w ww .j a v a 2 s .com*/ * @param terms * Map to place created WeightedSpanTerms in * @param spanQuery * SpanQuery to extract Terms from * @throws IOException */ private void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery) throws IOException { Set<String> fieldNames; if (fieldName == null) { fieldNames = new HashSet<String>(); collectSpanQueryFields(spanQuery, fieldNames); } else { fieldNames = new HashSet<String>(1); fieldNames.add(fieldName); } // To support the use of the default field name if (defaultField != null) { fieldNames.add(defaultField); } Map<String, SpanQuery> queries = new HashMap<String, SpanQuery>(); Set<Term> nonWeightedTerms = new HashSet<Term>(); final boolean mustRewriteQuery = mustRewriteQuery(spanQuery); if (mustRewriteQuery) { for (final String field : fieldNames) { final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getReaderForField(field)); queries.put(field, rewrittenQuery); rewrittenQuery.extractTerms(nonWeightedTerms); } } else { spanQuery.extractTerms(nonWeightedTerms); } List<PositionSpan> spanPositions = new ArrayList<PositionSpan>(); for (final String field : fieldNames) { IndexReader reader = getReaderForField(field); final Spans spans; if (mustRewriteQuery) { spans = queries.get(field).getSpans(reader); } else { spans = spanQuery.getSpans(reader); } // collect span positions while (spans.next()) { spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1)); } } if (spanPositions.size() == 0) { // no spans found return; } for (final Term queryTerm : nonWeightedTerms) { if (fieldNameComparator(queryTerm.field())) { WeightedSpanTerm weightedSpanTerm = terms.get(queryTerm.text()); if (weightedSpanTerm == null) { weightedSpanTerm = new WeightedSpanTerm(spanQuery.getBoost(), queryTerm.text()); weightedSpanTerm.addPositionSpans(spanPositions); weightedSpanTerm.positionSensitive = true; terms.put(queryTerm.text(), weightedSpanTerm); } else { if (spanPositions.size() > 0) { weightedSpanTerm.addPositionSpans(spanPositions); } } } } }
From source file:nl.inl.blacklab.search.Hits.java
License:Apache License
/** * Construct an empty Hits object// w w w . j a va 2 s.c om * * @param searcher * the searcher object * @param concordanceFieldPropName * field to use by default when finding concordances * @param sourceQuery * the query to execute to get the hits * @throws TooManyClauses if the query is overly broad (expands to too many terms) */ public Hits(Searcher searcher, String concordanceFieldPropName, SpanQuery sourceQuery) throws TooManyClauses { this(searcher, concordanceFieldPropName); try { DirectoryReader reader = searcher == null ? null : searcher.getIndexReader(); spanQuery = (SpanQuery) sourceQuery.rewrite(reader); termContexts = new HashMap<Term, TermContext>(); Set<Term> terms = new HashSet<Term>(); spanQuery.extractTerms(terms); etiquette = new ThreadPriority(); for (Term term : terms) { try { etiquette.behave(); } catch (InterruptedException e) { // Taking too long, break it off. // Not a very graceful way to do it... but at least it won't // be stuck forever. Thread.currentThread().interrupt(); // client can check this throw new RuntimeException("Query matches too many terms; aborted."); } termContexts.put(term, TermContext.build(reader.getContext(), term, true)); } currentSourceSpans = null; atomicReaderContexts = reader == null ? null : reader.leaves(); atomicReaderContextIndex = -1; hitQueryContext = new HitQueryContext(); // to keep track of captured groups, etc. //sourceSpans = BLSpansWrapper.optWrap(spanQuery.getSpans(srw != null ? srw.getContext() : null, srw != null ? srw.getLiveDocs() : null, termContexts)); } catch (IOException e) { throw new RuntimeException(e); } sourceSpansFullyRead = false; }
From source file:nl.inl.blacklab.search.HitsImpl.java
License:Apache License
/** * Construct a Hits object from a SpanQuery. * * @param searcher//from w w w.j av a 2 s .c om * the searcher object * @param concordanceFieldPropName * field to use by default when finding concordances * @param sourceQuery * the query to execute to get the hits * @throws TooManyClauses if the query is overly broad (expands to too many terms) */ HitsImpl(Searcher searcher, SpanQuery sourceQuery) throws TooManyClauses { this(searcher, (List<Hit>) null); try { IndexReader reader = searcher.getIndexReader(); spanQuery = (SpanQuery) sourceQuery.rewrite(reader); termContexts = new HashMap<>(); Set<Term> terms = new HashSet<>(); extractTermsFromSpanQuery(terms); etiquette = new ThreadPriority(); for (Term term : terms) { try { etiquette.behave(); } catch (InterruptedException e) { // Taking too long, break it off. // Not a very graceful way to do it... but at least it won't // be stuck forever. Thread.currentThread().interrupt(); // client can check this throw new RuntimeException("Query matches too many terms; aborted."); } termContexts.put(term, TermContext.build(reader.getContext(), term)); } currentSourceSpans = null; atomicReaderContexts = reader == null ? null : reader.leaves(); atomicReaderContextIndex = -1; //sourceSpans = BLSpansWrapper.optWrap(spanQuery.getSpans(srw != null ? srw.getContext() : null, srw != null ? srw.getLiveDocs() : null, termContexts)); } catch (IOException e) { throw new RuntimeException(e); } sourceSpansFullyRead = false; }
From source file:nl.inl.blacklab.search.lucene.BLSpanOrQuery.java
License:Apache License
@Override public Query rewrite(IndexReader reader) throws IOException { BLSpanOrQuery clone = null;/* w w w . ja v a 2 s . com*/ for (int i = 0; i < clauses.size(); i++) { SpanQuery c = clauses.get(i); SpanQuery query = (SpanQuery) c.rewrite(reader); if (query != c) { // clause rewrote: must clone if (clone == null) clone = this.clone(); clone.clauses.set(i, query); } } if (clone != null) { return clone; // some clauses rewrote } return this; // no clauses rewrote }
From source file:nl.inl.blacklab.search.lucene.SpanQueryAndNot.java
License:Apache License
@Override public Query rewrite(IndexReader reader) throws IOException { SpanQueryAndNot clone = null;// w w w . ja v a 2 s. c o m for (int i = 0; i < clauses.length; i++) { SpanQuery c = clauses[i]; SpanQuery query = (SpanQuery) c.rewrite(reader); if (query != c) { // clause rewrote: must clone if (clone == null) clone = (SpanQueryAndNot) clone(); clone.clauses[i] = query; } } if (clone != null) { return clone; // some clauses rewrote } return this; // no clauses rewrote }
From source file:nl.inl.blacklab.search.lucene.SpanQueryBase.java
License:Apache License
@Override public Query rewrite(IndexReader reader) throws IOException { SpanQueryBase clone = null;// ww w. j ava 2 s. co m for (int i = 0; i < clauses.length; i++) { SpanQuery c = clauses[i]; SpanQuery query = c == null ? null : (SpanQuery) c.rewrite(reader); if (query != c) { // clause rewritten: must clone if (clone == null) clone = (SpanQueryBase) clone(); clone.clauses[i] = query; } } if (clone != null) { return clone; // some clauses rewritten } return this; // no clauses rewritten }
From source file:nl.inl.blacklab.search.lucene.SpanQueryDocLevelAndNot.java
License:Apache License
@Override public Query rewrite(IndexReader reader) throws IOException { SpanQueryDocLevelAndNot clone = null; for (int i = 0; i < clauses.length; i++) { SpanQuery c = clauses[i]; SpanQuery query = (SpanQuery) c.rewrite(reader); if (query != c) { // clause rewrote: must clone if (clone == null) clone = (SpanQueryDocLevelAndNot) clone(); clone.clauses[i] = query;//from w ww .ja v a 2 s . c o m } } if (clone != null) { return clone; // some clauses rewrote } return this; // no clauses rewrote }
From source file:org.apache.solr.analysis.entity.TestEntityFilterFactory.java
License:Apache License
private long countSpans(SpanQuery q) throws Exception { List<AtomicReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); AtomicReaderContext ctx = ctxs.get(0); q = (SpanQuery) q.rewrite(ctx.reader()); Spans spans = q.getSpans(ctx, null, new HashMap<Term, TermContext>()); long i = 0;//from w ww . j a v a2s. com while (spans.next()) { i++; } return i; }
From source file:org.apache.solr.analysis.entity.TestEntityFilterFactory.java
License:Apache License
private long countDocs(SpanQuery q) throws Exception { OpenBitSet docs = new OpenBitSet(); List<AtomicReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); AtomicReaderContext ctx = ctxs.get(0); IndexReaderContext parentCtx = reader.getContext(); q = (SpanQuery) q.rewrite(ctx.reader()); Set<Term> qTerms = new HashSet<Term>(); q.extractTerms(qTerms);//w w w . j a v a2 s. c om Map<Term, TermContext> termContexts = new HashMap<Term, TermContext>(); for (Term t : qTerms) { TermContext c = TermContext.build(parentCtx, t); termContexts.put(t, c); } Spans spans = q.getSpans(ctx, null, termContexts); while (spans.next()) { docs.set(spans.doc()); } long spanDocHits = docs.cardinality(); // double check with a regular searcher TotalHitCountCollector coll = new TotalHitCountCollector(); searcher.search(q, coll); assertEquals(coll.getTotalHits(), spanDocHits); return spanDocHits; }
From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java
License:Apache License
long countSpans(String field, Query q) throws Exception { List<LeafReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); LeafReaderContext leafReaderContext = ctxs.get(0); SpanQuery sq = convert(field, q); sq = (SpanQuery) sq.rewrite(reader); SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS); long i = 0;/* w w w . j a va 2 s .c om*/ if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { i++; } } } return i; }