Example usage for org.apache.lucene.search.spans SpanQuery rewrite

List of usage examples for org.apache.lucene.search.spans SpanQuery rewrite

Introduction

In this page you can find the example usage for org.apache.lucene.search.spans SpanQuery rewrite.

Prototype

public Query rewrite(IndexReader reader) throws IOException 

Source Link

Document

Expert: called to re-write queries into primitive queries.

Usage

From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java

License:Apache License

/**
 * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>SpanQuery</code>.
 * /*  w  ww .j a  v  a  2 s .com*/
 * @param terms
 *          Map to place created WeightedSpanTerms in
 * @param spanQuery
 *          SpanQuery to extract Terms from
 * @throws IOException
 */
private void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery)
        throws IOException {
    Set<String> fieldNames;

    if (fieldName == null) {
        fieldNames = new HashSet<String>();
        collectSpanQueryFields(spanQuery, fieldNames);
    } else {
        fieldNames = new HashSet<String>(1);
        fieldNames.add(fieldName);
    }
    // To support the use of the default field name
    if (defaultField != null) {
        fieldNames.add(defaultField);
    }

    Map<String, SpanQuery> queries = new HashMap<String, SpanQuery>();

    Set<Term> nonWeightedTerms = new HashSet<Term>();
    final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
    if (mustRewriteQuery) {
        for (final String field : fieldNames) {
            final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getReaderForField(field));
            queries.put(field, rewrittenQuery);
            rewrittenQuery.extractTerms(nonWeightedTerms);
        }
    } else {
        spanQuery.extractTerms(nonWeightedTerms);
    }

    List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();

    for (final String field : fieldNames) {

        IndexReader reader = getReaderForField(field);
        final Spans spans;
        if (mustRewriteQuery) {
            spans = queries.get(field).getSpans(reader);
        } else {
            spans = spanQuery.getSpans(reader);
        }

        // collect span positions
        while (spans.next()) {
            spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1));
        }

    }

    if (spanPositions.size() == 0) {
        // no spans found
        return;
    }

    for (final Term queryTerm : nonWeightedTerms) {

        if (fieldNameComparator(queryTerm.field())) {
            WeightedSpanTerm weightedSpanTerm = terms.get(queryTerm.text());

            if (weightedSpanTerm == null) {
                weightedSpanTerm = new WeightedSpanTerm(spanQuery.getBoost(), queryTerm.text());
                weightedSpanTerm.addPositionSpans(spanPositions);
                weightedSpanTerm.positionSensitive = true;
                terms.put(queryTerm.text(), weightedSpanTerm);
            } else {
                if (spanPositions.size() > 0) {
                    weightedSpanTerm.addPositionSpans(spanPositions);
                }
            }
        }
    }
}

From source file:nl.inl.blacklab.search.Hits.java

License:Apache License

/**
 * Construct an empty Hits object//  w  w  w  .  j a va  2 s.c om
 *
 * @param searcher
 *            the searcher object
 * @param concordanceFieldPropName
 *            field to use by default when finding concordances
 * @param sourceQuery
 *            the query to execute to get the hits
 * @throws TooManyClauses if the query is overly broad (expands to too many terms)
 */
public Hits(Searcher searcher, String concordanceFieldPropName, SpanQuery sourceQuery) throws TooManyClauses {
    this(searcher, concordanceFieldPropName);

    try {
        DirectoryReader reader = searcher == null ? null : searcher.getIndexReader();
        spanQuery = (SpanQuery) sourceQuery.rewrite(reader);
        termContexts = new HashMap<Term, TermContext>();
        Set<Term> terms = new HashSet<Term>();
        spanQuery.extractTerms(terms);
        etiquette = new ThreadPriority();
        for (Term term : terms) {
            try {
                etiquette.behave();
            } catch (InterruptedException e) {
                // Taking too long, break it off.
                // Not a very graceful way to do it... but at least it won't
                // be stuck forever.
                Thread.currentThread().interrupt(); // client can check this
                throw new RuntimeException("Query matches too many terms; aborted.");
            }
            termContexts.put(term, TermContext.build(reader.getContext(), term, true));
        }

        currentSourceSpans = null;
        atomicReaderContexts = reader == null ? null : reader.leaves();
        atomicReaderContextIndex = -1;
        hitQueryContext = new HitQueryContext(); // to keep track of captured groups, etc.
        //sourceSpans = BLSpansWrapper.optWrap(spanQuery.getSpans(srw != null ? srw.getContext() : null, srw != null ? srw.getLiveDocs() : null, termContexts));
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    sourceSpansFullyRead = false;
}

From source file:nl.inl.blacklab.search.HitsImpl.java

License:Apache License

/**
 * Construct a Hits object from a SpanQuery.
 *
 * @param searcher//from  w w w.j  av a 2  s  .c  om
 *            the searcher object
 * @param concordanceFieldPropName
 *            field to use by default when finding concordances
 * @param sourceQuery
 *            the query to execute to get the hits
 * @throws TooManyClauses if the query is overly broad (expands to too many terms)
 */
HitsImpl(Searcher searcher, SpanQuery sourceQuery) throws TooManyClauses {
    this(searcher, (List<Hit>) null);
    try {
        IndexReader reader = searcher.getIndexReader();
        spanQuery = (SpanQuery) sourceQuery.rewrite(reader);
        termContexts = new HashMap<>();
        Set<Term> terms = new HashSet<>();
        extractTermsFromSpanQuery(terms);
        etiquette = new ThreadPriority();
        for (Term term : terms) {
            try {
                etiquette.behave();
            } catch (InterruptedException e) {
                // Taking too long, break it off.
                // Not a very graceful way to do it... but at least it won't
                // be stuck forever.
                Thread.currentThread().interrupt(); // client can check this
                throw new RuntimeException("Query matches too many terms; aborted.");
            }
            termContexts.put(term, TermContext.build(reader.getContext(), term));
        }

        currentSourceSpans = null;
        atomicReaderContexts = reader == null ? null : reader.leaves();
        atomicReaderContextIndex = -1;
        //sourceSpans = BLSpansWrapper.optWrap(spanQuery.getSpans(srw != null ? srw.getContext() : null, srw != null ? srw.getLiveDocs() : null, termContexts));
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    sourceSpansFullyRead = false;
}

From source file:nl.inl.blacklab.search.lucene.BLSpanOrQuery.java

License:Apache License

@Override
public Query rewrite(IndexReader reader) throws IOException {
    BLSpanOrQuery clone = null;/*  w  w w . ja v  a  2 s  . com*/
    for (int i = 0; i < clauses.size(); i++) {
        SpanQuery c = clauses.get(i);
        SpanQuery query = (SpanQuery) c.rewrite(reader);
        if (query != c) { // clause rewrote: must clone
            if (clone == null)
                clone = this.clone();
            clone.clauses.set(i, query);
        }
    }
    if (clone != null) {
        return clone; // some clauses rewrote
    }
    return this; // no clauses rewrote
}

From source file:nl.inl.blacklab.search.lucene.SpanQueryAndNot.java

License:Apache License

@Override
public Query rewrite(IndexReader reader) throws IOException {
    SpanQueryAndNot clone = null;// w w  w  . ja v  a  2  s.  c  o m

    for (int i = 0; i < clauses.length; i++) {
        SpanQuery c = clauses[i];
        SpanQuery query = (SpanQuery) c.rewrite(reader);
        if (query != c) { // clause rewrote: must clone
            if (clone == null)
                clone = (SpanQueryAndNot) clone();
            clone.clauses[i] = query;
        }
    }
    if (clone != null) {
        return clone; // some clauses rewrote
    }
    return this; // no clauses rewrote
}

From source file:nl.inl.blacklab.search.lucene.SpanQueryBase.java

License:Apache License

@Override
public Query rewrite(IndexReader reader) throws IOException {
    SpanQueryBase clone = null;// ww  w. j  ava  2  s. co m
    for (int i = 0; i < clauses.length; i++) {
        SpanQuery c = clauses[i];
        SpanQuery query = c == null ? null : (SpanQuery) c.rewrite(reader);
        if (query != c) {
            // clause rewritten: must clone
            if (clone == null)
                clone = (SpanQueryBase) clone();
            clone.clauses[i] = query;
        }
    }
    if (clone != null) {
        return clone; // some clauses rewritten
    }
    return this; // no clauses rewritten
}

From source file:nl.inl.blacklab.search.lucene.SpanQueryDocLevelAndNot.java

License:Apache License

@Override
public Query rewrite(IndexReader reader) throws IOException {
    SpanQueryDocLevelAndNot clone = null;

    for (int i = 0; i < clauses.length; i++) {
        SpanQuery c = clauses[i];
        SpanQuery query = (SpanQuery) c.rewrite(reader);
        if (query != c) { // clause rewrote: must clone
            if (clone == null)
                clone = (SpanQueryDocLevelAndNot) clone();
            clone.clauses[i] = query;//from  w  ww .ja  v a  2 s  . c  o  m
        }
    }
    if (clone != null) {
        return clone; // some clauses rewrote
    }
    return this; // no clauses rewrote
}

From source file:org.apache.solr.analysis.entity.TestEntityFilterFactory.java

License:Apache License

private long countSpans(SpanQuery q) throws Exception {
    List<AtomicReaderContext> ctxs = reader.leaves();
    assert (ctxs.size() == 1);
    AtomicReaderContext ctx = ctxs.get(0);
    q = (SpanQuery) q.rewrite(ctx.reader());
    Spans spans = q.getSpans(ctx, null, new HashMap<Term, TermContext>());

    long i = 0;//from   w ww  . j  a v  a2s.  com
    while (spans.next()) {
        i++;
    }
    return i;
}

From source file:org.apache.solr.analysis.entity.TestEntityFilterFactory.java

License:Apache License

private long countDocs(SpanQuery q) throws Exception {
    OpenBitSet docs = new OpenBitSet();
    List<AtomicReaderContext> ctxs = reader.leaves();
    assert (ctxs.size() == 1);
    AtomicReaderContext ctx = ctxs.get(0);
    IndexReaderContext parentCtx = reader.getContext();
    q = (SpanQuery) q.rewrite(ctx.reader());

    Set<Term> qTerms = new HashSet<Term>();
    q.extractTerms(qTerms);//w w  w  .  j a v a2 s. c om
    Map<Term, TermContext> termContexts = new HashMap<Term, TermContext>();

    for (Term t : qTerms) {
        TermContext c = TermContext.build(parentCtx, t);
        termContexts.put(t, c);
    }

    Spans spans = q.getSpans(ctx, null, termContexts);

    while (spans.next()) {
        docs.set(spans.doc());
    }
    long spanDocHits = docs.cardinality();
    // double check with a regular searcher
    TotalHitCountCollector coll = new TotalHitCountCollector();
    searcher.search(q, coll);
    assertEquals(coll.getTotalHits(), spanDocHits);
    return spanDocHits;

}

From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java

License:Apache License

long countSpans(String field, Query q) throws Exception {
    List<LeafReaderContext> ctxs = reader.leaves();

    assert (ctxs.size() == 1);
    LeafReaderContext leafReaderContext = ctxs.get(0);
    SpanQuery sq = convert(field, q);
    sq = (SpanQuery) sq.rewrite(reader);
    SpanWeight sw = sq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);

    final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);

    long i = 0;/* w w w .  j a va 2 s .c  om*/
    if (spans != null) {
        while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                i++;
            }
        }
    }
    return i;
}