org.elasticsearch.index.search.child.TopChildrenQuery.java Source code

Introduction

Here is the source code for org.elasticsearch.index.search.child.TopChildrenQuery.java
Source

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.index.search.child;

import com.carrotsearch.hppc.IntObjectOpenHashMap;
import com.carrotsearch.hppc.ObjectObjectOpenHashMap;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.ToStringUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.cache.recycler.CacheRecycler;
import org.elasticsearch.common.lease.Releasable;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.lucene.search.EmptyScorer;
import org.elasticsearch.common.recycler.Recycler;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.plain.ParentChildIndexFieldData;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Set;

/**
 * A query that evaluates the top matching child documents (based on the score) in order to determine what
 * parent documents to return. This query tries to find just enough child documents to return the the requested
 * number of parent documents (or less if no other child document can be found).
 * <p/>
 * This query executes several internal searches. In the first round it tries to find ((request offset + requested size) * factor)
 * child documents. The resulting child documents are mapped into their parent documents including the aggragted child scores.
 * If not enough parent documents could be resolved then a subsequent round is executed, requesting previous requested
 * documents times incremental_factor. This logic repeats until enough parent documents are resolved or until no more
 * child documents are available.
 * <p/>
 * This query is most of the times faster than the {@link ChildrenQuery}. Usually enough parent documents can be returned
 * in the first child document query round.
 */
public class TopChildrenQuery extends Query {

    private static final ParentDocComparator PARENT_DOC_COMP = new ParentDocComparator();

    private final ParentChildIndexFieldData parentChildIndexFieldData;
    private final CacheRecycler cacheRecycler;
    private final String parentType;
    private final String childType;
    private final ScoreType scoreType;
    private final int factor;
    private final int incrementalFactor;
    private final Query originalChildQuery;
    private final Filter nonNestedDocsFilter;

    // This field will hold the rewritten form of originalChildQuery, so that we can reuse it
    private Query rewrittenChildQuery;
    private IndexReader rewriteIndexReader;

    // Note, the query is expected to already be filtered to only child type docs
    public TopChildrenQuery(ParentChildIndexFieldData parentChildIndexFieldData, Query childQuery, String childType,
            String parentType, ScoreType scoreType, int factor, int incrementalFactor, CacheRecycler cacheRecycler,
            Filter nonNestedDocsFilter) {
        this.parentChildIndexFieldData = parentChildIndexFieldData;
        this.originalChildQuery = childQuery;
        this.childType = childType;
        this.parentType = parentType;
        this.scoreType = scoreType;
        this.factor = factor;
        this.incrementalFactor = incrementalFactor;
        this.cacheRecycler = cacheRecycler;
        this.nonNestedDocsFilter = nonNestedDocsFilter;
    }

    // Rewrite invocation logic:
    // 1) query_then_fetch (default): Rewrite is execute as part of the createWeight invocation, when search child docs.
    // 2) dfs_query_then_fetch:: First rewrite and then createWeight is executed. During query phase rewrite isn't
    // executed any more because searchContext#queryRewritten() returns true.
    @Override
    public Query rewrite(IndexReader reader) throws IOException {
        if (rewrittenChildQuery == null) {
            rewrittenChildQuery = originalChildQuery.rewrite(reader);
            rewriteIndexReader = reader;
        }
        // We can always return the current instance, and we can do this b/c the child query is executed separately
        // before the main query (other scope) in a different IS#search() invocation than the main query.
        // In fact we only need override the rewrite method because for the dfs phase, to get also global document
        // frequency for the child query.
        return this;
    }

    @Override
    public void extractTerms(Set<Term> terms) {
        rewrittenChildQuery.extractTerms(terms);
    }

    @Override
    public Weight createWeight(IndexSearcher searcher) throws IOException {
        Recycler.V<ObjectObjectOpenHashMap<Object, ParentDoc[]>> parentDocs = cacheRecycler.hashMap(-1);
        SearchContext searchContext = SearchContext.current();

        int parentHitsResolved;
        int requestedDocs = (searchContext.from() + searchContext.size());
        if (requestedDocs <= 0) {
            requestedDocs = 1;
        }
        int numChildDocs = requestedDocs * factor;

        Query childQuery;
        if (rewrittenChildQuery == null) {
            childQuery = rewrittenChildQuery = searcher.rewrite(originalChildQuery);
        } else {
            assert rewriteIndexReader == searcher.getIndexReader();
            childQuery = rewrittenChildQuery;
        }

        IndexSearcher indexSearcher = new IndexSearcher(searcher.getIndexReader());
        indexSearcher.setSimilarity(searcher.getSimilarity());
        while (true) {
            parentDocs.v().clear();
            TopDocs topChildDocs = indexSearcher.search(childQuery, numChildDocs);
            try {
                parentHitsResolved = resolveParentDocuments(topChildDocs, searchContext, parentDocs);
            } catch (Exception e) {
                throw new IOException(e);
            }

            // check if we found enough docs, if so, break
            if (parentHitsResolved >= requestedDocs) {
                break;
            }
            // if we did not find enough docs, check if it make sense to search further
            if (topChildDocs.totalHits <= numChildDocs) {
                break;
            }
            // if not, update numDocs, and search again
            numChildDocs *= incrementalFactor;
            if (numChildDocs > topChildDocs.totalHits) {
                numChildDocs = topChildDocs.totalHits;
            }
        }

        ParentWeight parentWeight = new ParentWeight(rewrittenChildQuery.createWeight(searcher), parentDocs);
        searchContext.addReleasable(parentWeight);
        return parentWeight;
    }

    int resolveParentDocuments(TopDocs topDocs, SearchContext context,
            Recycler.V<ObjectObjectOpenHashMap<Object, ParentDoc[]>> parentDocs) throws Exception {
        int parentHitsResolved = 0;
        Recycler.V<ObjectObjectOpenHashMap<Object, Recycler.V<IntObjectOpenHashMap<ParentDoc>>>> parentDocsPerReader = cacheRecycler
                .hashMap(context.searcher().getIndexReader().leaves().size());
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            int readerIndex = ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves());
            AtomicReaderContext subContext = context.searcher().getIndexReader().leaves().get(readerIndex);
            BytesValues.WithOrdinals parentValues = parentChildIndexFieldData.load(subContext)
                    .getBytesValues(parentType);
            int subDoc = scoreDoc.doc - subContext.docBase;

            // find the parent id
            parentValues.setDocument(subDoc);
            BytesRef parentId = parentValues.nextValue();
            if (parentId == null) {
                // no parent found
                continue;
            }
            // now go over and find the parent doc Id and reader tuple
            for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) {
                AtomicReader indexReader = atomicReaderContext.reader();
                FixedBitSet nonNestedDocs = null;
                if (nonNestedDocsFilter != null) {
                    nonNestedDocs = (FixedBitSet) nonNestedDocsFilter.getDocIdSet(atomicReaderContext,
                            indexReader.getLiveDocs());
                }

                Terms terms = indexReader.terms(UidFieldMapper.NAME);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator(null);
                if (!termsEnum.seekExact(Uid.createUidAsBytes(parentType, parentId))) {
                    continue;
                }
                DocsEnum docsEnum = termsEnum.docs(indexReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
                int parentDocId = docsEnum.nextDoc();
                if (nonNestedDocs != null && !nonNestedDocs.get(parentDocId)) {
                    parentDocId = nonNestedDocs.nextSetBit(parentDocId);
                }
                if (parentDocId != DocsEnum.NO_MORE_DOCS) {
                    // we found a match, add it and break

                    Recycler.V<IntObjectOpenHashMap<ParentDoc>> readerParentDocs = parentDocsPerReader.v()
                            .get(indexReader.getCoreCacheKey());
                    if (readerParentDocs == null) {
                        readerParentDocs = cacheRecycler.intObjectMap(indexReader.maxDoc());
                        parentDocsPerReader.v().put(indexReader.getCoreCacheKey(), readerParentDocs);
                    }

                    ParentDoc parentDoc = readerParentDocs.v().get(parentDocId);
                    if (parentDoc == null) {
                        parentHitsResolved++; // we have a hit on a parent
                        parentDoc = new ParentDoc();
                        parentDoc.docId = parentDocId;
                        parentDoc.count = 1;
                        parentDoc.maxScore = scoreDoc.score;
                        parentDoc.sumScores = scoreDoc.score;
                        readerParentDocs.v().put(parentDocId, parentDoc);
                    } else {
                        parentDoc.count++;
                        parentDoc.sumScores += scoreDoc.score;
                        if (scoreDoc.score > parentDoc.maxScore) {
                            parentDoc.maxScore = scoreDoc.score;
                        }
                    }
                }
            }
        }
        boolean[] states = parentDocsPerReader.v().allocated;
        Object[] keys = parentDocsPerReader.v().keys;
        Object[] values = parentDocsPerReader.v().values;
        for (int i = 0; i < states.length; i++) {
            if (states[i]) {
                Recycler.V<IntObjectOpenHashMap<ParentDoc>> value = (Recycler.V<IntObjectOpenHashMap<ParentDoc>>) values[i];
                ParentDoc[] _parentDocs = value.v().values().toArray(ParentDoc.class);
                Arrays.sort(_parentDocs, PARENT_DOC_COMP);
                parentDocs.v().put(keys[i], _parentDocs);
                Releasables.release(value);
            }
        }
        Releasables.release(parentDocsPerReader);
        return parentHitsResolved;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null || obj.getClass() != this.getClass()) {
            return false;
        }

        TopChildrenQuery that = (TopChildrenQuery) obj;
        if (!originalChildQuery.equals(that.originalChildQuery)) {
            return false;
        }
        if (!childType.equals(that.childType)) {
            return false;
        }
        if (incrementalFactor != that.incrementalFactor) {
            return false;
        }
        if (getBoost() != that.getBoost()) {
            return false;
        }
        return true;
    }

    @Override
    public int hashCode() {
        int result = originalChildQuery.hashCode();
        result = 31 * result + parentType.hashCode();
        result = 31 * result + incrementalFactor;
        result = 31 * result + Float.floatToIntBits(getBoost());
        return result;
    }

    public String toString(String field) {
        StringBuilder sb = new StringBuilder();
        sb.append("score_child[").append(childType).append("/").append(parentType).append("](")
                .append(originalChildQuery.toString(field)).append(')');
        sb.append(ToStringUtils.boost(getBoost()));
        return sb.toString();
    }

    private class ParentWeight extends Weight implements Releasable {

        private final Weight queryWeight;
        private final Recycler.V<ObjectObjectOpenHashMap<Object, ParentDoc[]>> parentDocs;

        public ParentWeight(Weight queryWeight, Recycler.V<ObjectObjectOpenHashMap<Object, ParentDoc[]>> parentDocs)
                throws IOException {
            this.queryWeight = queryWeight;
            this.parentDocs = parentDocs;
        }

        public Query getQuery() {
            return TopChildrenQuery.this;
        }

        @Override
        public float getValueForNormalization() throws IOException {
            float sum = queryWeight.getValueForNormalization();
            sum *= getBoost() * getBoost();
            return sum;
        }

        @Override
        public void normalize(float norm, float topLevelBoost) {
            // Nothing to normalize
        }

        @Override
        public boolean release() throws ElasticsearchException {
            Releasables.release(parentDocs);
            return true;
        }

        @Override
        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer,
                Bits acceptDocs) throws IOException {
            ParentDoc[] readerParentDocs = parentDocs.v().get(context.reader().getCoreCacheKey());
            if (readerParentDocs != null) {
                if (scoreType == ScoreType.MAX) {
                    return new ParentScorer(this, readerParentDocs) {
                        @Override
                        public float score() throws IOException {
                            assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
                            return doc.maxScore;
                        }
                    };
                } else if (scoreType == ScoreType.AVG) {
                    return new ParentScorer(this, readerParentDocs) {
                        @Override
                        public float score() throws IOException {
                            assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
                            return doc.sumScores / doc.count;
                        }
                    };
                } else if (scoreType == ScoreType.SUM) {
                    return new ParentScorer(this, readerParentDocs) {
                        @Override
                        public float score() throws IOException {
                            assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
                            return doc.sumScores;
                        }

                    };
                }
                throw new ElasticsearchIllegalStateException("No support for score type [" + scoreType + "]");
            }
            return new EmptyScorer(this);
        }

        @Override
        public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
            return new Explanation(getBoost(), "not implemented yet...");
        }
    }

    private static abstract class ParentScorer extends Scorer {

        private final ParentDoc spare = new ParentDoc();
        protected final ParentDoc[] docs;
        protected ParentDoc doc = spare;
        private int index = -1;

        ParentScorer(ParentWeight weight, ParentDoc[] docs) throws IOException {
            super(weight);
            this.docs = docs;
            spare.docId = -1;
            spare.count = -1;
        }

        @Override
        public final int docID() {
            return doc.docId;
        }

        @Override
        public final int advance(int target) throws IOException {
            return slowAdvance(target);
        }

        @Override
        public final int nextDoc() throws IOException {
            if (++index >= docs.length) {
                doc = spare;
                doc.count = 0;
                return (doc.docId = NO_MORE_DOCS);
            }
            return (doc = docs[index]).docId;
        }

        @Override
        public final int freq() throws IOException {
            return doc.count; // The number of matches in the child doc, which is propagated to parent
        }

        @Override
        public final long cost() {
            return docs.length;
        }
    }

    private static class ParentDocComparator implements Comparator<ParentDoc> {
        @Override
        public int compare(ParentDoc o1, ParentDoc o2) {
            return o1.docId - o2.docId;
        }
    }

    private static class ParentDoc {
        public int docId;
        public int count;
        public float maxScore = Float.NaN;
        public float sumScores = 0;
    }

}