org.apache.lucene.search.spans.SpanWeight.java Source code

Introduction

Here is the source code for org.apache.lucene.search.spans.SpanWeight.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.spans;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil;

/**
 * Expert-only.  Public for use by other weight implementations
 */
public abstract class SpanWeight extends Weight {

    /**
     * Enumeration defining what postings information should be retrieved from the
     * index for a given Spans
     */
    public enum Postings {
        POSITIONS {
            @Override
            public int getRequiredPostings() {
                return PostingsEnum.POSITIONS;
            }
        },
        PAYLOADS {
            @Override
            public int getRequiredPostings() {
                return PostingsEnum.PAYLOADS;
            }
        },
        OFFSETS {
            @Override
            public int getRequiredPostings() {
                return PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS;
            }
        };

        public abstract int getRequiredPostings();

        public Postings atLeast(Postings postings) {
            if (postings.compareTo(this) > 0)
                return postings;
            return this;
        }
    }

    protected final Similarity similarity;
    protected final Similarity.SimScorer simScorer;
    protected final String field;

    /**
     * Create a new SpanWeight
     * @param query the parent query
     * @param searcher the IndexSearcher to query against
     * @param termStates a map of terms to {@link TermStates} for use in building the similarity.  May
     *                     be null if scores are not required
     * @throws IOException on error
     */
    public SpanWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost)
            throws IOException {
        super(query);
        this.field = query.getField();
        this.similarity = searcher.getSimilarity();
        this.simScorer = buildSimWeight(query, searcher, termStates, boost);
    }

    private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searcher,
            Map<Term, TermStates> termStates, float boost) throws IOException {
        if (termStates == null || termStates.size() == 0 || query.getField() == null)
            return null;
        TermStatistics[] termStats = new TermStatistics[termStates.size()];
        int termUpTo = 0;
        for (Map.Entry<Term, TermStates> entry : termStates.entrySet()) {
            TermStates ts = entry.getValue();
            if (ts.docFreq() > 0) {
                termStats[termUpTo++] = searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq());
            }
        }
        CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
        if (termUpTo > 0) {
            return similarity.scorer(boost, collectionStats, ArrayUtil.copyOfSubArray(termStats, 0, termUpTo));
        } else {
            return null; // no terms at all exist, we won't use similarity
        }
    }

    /**
     * Collect all TermStates used by this Weight
     * @param contexts a map to add the TermStates to
     */
    public abstract void extractTermStates(Map<Term, TermStates> contexts);

    /**
     * Expert: Return a Spans object iterating over matches from this Weight
     * @param ctx a LeafReaderContext for this Spans
     * @return a Spans
     * @throws IOException on error
     */
    public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException;

    @Override
    public SpanScorer scorer(LeafReaderContext context) throws IOException {
        final Spans spans = getSpans(context, Postings.POSITIONS);
        if (spans == null) {
            return null;
        }
        final LeafSimScorer docScorer = getSimScorer(context);
        return new SpanScorer(this, spans, docScorer);
    }

    /**
     * Return a LeafSimScorer for this context
     * @param context the LeafReaderContext
     * @return a SimWeight
     * @throws IOException on error
     */
    public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
        return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), field, true);
    }

    @Override
    public Explanation explain(LeafReaderContext context, int doc) throws IOException {
        SpanScorer scorer = scorer(context);
        if (scorer != null) {
            int newDoc = scorer.iterator().advance(doc);
            if (newDoc == doc) {
                float freq = scorer.sloppyFreq();
                LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), field, true);
                Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
                Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
                return Explanation.match(scoreExplanation.getValue(), "weight(" + getQuery() + " in " + doc + ") ["
                        + similarity.getClass().getSimpleName() + "], result of:", scoreExplanation);
            }
        }

        return Explanation.noMatch("no matching term");
    }

    private static class TermMatch {
        Term term;
        int position;
        int startOffset;
        int endOffset;
    }

    @Override
    public Matches matches(LeafReaderContext context, int doc) throws IOException {
        return MatchesUtils.forField(field, () -> {
            Spans spans = getSpans(context, Postings.OFFSETS);
            if (spans == null || spans.advance(doc) != doc) {
                return null;
            }
            return new MatchesIterator() {

                int innerTermCount = 0;
                TermMatch[] innerTerms = new TermMatch[0];

                SpanCollector termCollector = new SpanCollector() {
                    @Override
                    public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
                        innerTermCount++;
                        if (innerTermCount > innerTerms.length) {
                            TermMatch[] temp = new TermMatch[innerTermCount];
                            System.arraycopy(innerTerms, 0, temp, 0, innerTermCount - 1);
                            innerTerms = temp;
                            innerTerms[innerTermCount - 1] = new TermMatch();
                        }
                        innerTerms[innerTermCount - 1].term = term;
                        innerTerms[innerTermCount - 1].position = position;
                        innerTerms[innerTermCount - 1].startOffset = postings.startOffset();
                        innerTerms[innerTermCount - 1].endOffset = postings.endOffset();
                    }

                    @Override
                    public void reset() {
                        innerTermCount = 0;
                    }
                };

                @Override
                public boolean next() throws IOException {
                    innerTermCount = 0;
                    return spans.nextStartPosition() != Spans.NO_MORE_POSITIONS;
                }

                @Override
                public int startPosition() {
                    return spans.startPosition();
                }

                @Override
                public int endPosition() {
                    return spans.endPosition() - 1;
                }

                @Override
                public int startOffset() throws IOException {
                    if (innerTermCount == 0) {
                        collectInnerTerms();
                    }
                    return innerTerms[0].startOffset;
                }

                @Override
                public int endOffset() throws IOException {
                    if (innerTermCount == 0) {
                        collectInnerTerms();
                    }
                    return innerTerms[innerTermCount - 1].endOffset;
                }

                @Override
                public MatchesIterator getSubMatches() throws IOException {
                    if (innerTermCount == 0) {
                        collectInnerTerms();
                    }
                    return new MatchesIterator() {

                        int upto = -1;

                        @Override
                        public boolean next() throws IOException {
                            upto++;
                            return upto < innerTermCount;
                        }

                        @Override
                        public int startPosition() {
                            return innerTerms[upto].position;
                        }

                        @Override
                        public int endPosition() {
                            return innerTerms[upto].position;
                        }

                        @Override
                        public int startOffset() throws IOException {
                            return innerTerms[upto].startOffset;
                        }

                        @Override
                        public int endOffset() throws IOException {
                            return innerTerms[upto].endOffset;
                        }

                        @Override
                        public MatchesIterator getSubMatches() throws IOException {
                            return null;
                        }

                        @Override
                        public Query getQuery() {
                            return new TermQuery(innerTerms[upto].term);
                        }
                    };
                }

                @Override
                public Query getQuery() {
                    return SpanWeight.this.getQuery();
                }

                void collectInnerTerms() throws IOException {
                    termCollector.reset();
                    spans.collect(termCollector);
                    Arrays.sort(innerTerms, 0, innerTermCount, Comparator.comparing(a -> a.position));
                }
            };
        });
    }
}