io.crate.execution.engine.collect.collectors.LuceneOrderedDocCollector.java Source code

Introduction

Here is the source code for io.crate.execution.engine.collect.collectors.LuceneOrderedDocCollector.java
Source

/*
 * Licensed to Crate under one or more contributor license agreements.
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.  Crate licenses this file
 * to you under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.  You may
 * obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.  See the License for the specific language governing
 * permissions and limitations under the License.
 *
 * However, if you have executed another commercial license agreement
 * with Crate these terms will supersede the license and you may use the
 * software solely pursuant to the terms of the relevant commercial
 * agreement.
 */

package io.crate.execution.engine.collect.collectors;

import com.google.common.collect.Iterables;
import io.crate.data.Input;
import io.crate.data.Row;
import io.crate.execution.engine.distribution.merge.KeyIterable;
import io.crate.expression.reference.doc.lucene.CollectorContext;
import io.crate.expression.reference.doc.lucene.LuceneCollectorExpression;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopFieldCollector;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.lucene.MinimumScoreCollector;
import org.elasticsearch.index.shard.ShardId;

import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.function.Function;

public class LuceneOrderedDocCollector extends OrderedDocCollector {

    private static final Logger LOGGER = Loggers.getLogger(LuceneOrderedDocCollector.class);

    private final Query query;
    private final Float minScore;
    private final boolean doDocsScores;
    private final int batchSize;
    private final CollectorContext collectorContext;
    private final Function<FieldDoc, Query> searchAfterQueryOptimize;
    private final Sort sort;
    private final Collection<? extends LuceneCollectorExpression<?>> expressions;
    private final ScoreDocRowFunction rowFunction;
    private final DummyScorer scorer;
    private final IndexSearcher searcher;

    @Nullable
    private volatile FieldDoc lastDoc = null;

    public LuceneOrderedDocCollector(ShardId shardId, IndexSearcher searcher, Query query, Float minScore,
            boolean doDocsScores, int batchSize, CollectorContext collectorContext,
            Function<FieldDoc, Query> searchAfterQueryOptimize, Sort sort, List<? extends Input<?>> inputs,
            Collection<? extends LuceneCollectorExpression<?>> expressions) {
        super(shardId);
        this.searcher = searcher;
        this.query = query;
        this.minScore = minScore;
        this.doDocsScores = doDocsScores;
        this.batchSize = batchSize;
        this.collectorContext = collectorContext;
        this.searchAfterQueryOptimize = searchAfterQueryOptimize;
        this.sort = sort;
        this.scorer = new DummyScorer();
        this.expressions = expressions;
        this.rowFunction = new ScoreDocRowFunction(searcher.getIndexReader(), inputs, expressions, scorer);
    }

    /**
     * On the first call this will do an initial search and provide {@link #batchSize} number of rows
     * (or less if there aren't more available)
     * </p>
     * On subsequent calls it will return more rows (max {@link #batchSize} or less.
     * These rows are always the rows that come after the last row of the previously returned rows
     * <p/>
     * Basically, calling this function multiple times pages through the shard in batches.
     */
    @Override
    public KeyIterable<ShardId, Row> collect() {
        try {
            if (lastDoc == null) {
                return initialSearch();
            }
            return searchMore();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public void close() {
    }

    private KeyIterable<ShardId, Row> initialSearch() throws IOException {
        for (LuceneCollectorExpression<?> expression : expressions) {
            expression.startCollect(collectorContext);
            expression.setScorer(scorer);
        }
        TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, batchSize, true, doDocsScores,
                doDocsScores);
        return doSearch(topFieldCollector, minScore, query);
    }

    private KeyIterable<ShardId, Row> searchMore() throws IOException {
        if (exhausted()) {
            LOGGER.trace("searchMore but EXHAUSTED");
            return empty();
        }
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("searchMore from [{}]", lastDoc);
        }
        TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, batchSize, lastDoc, true, doDocsScores,
                doDocsScores);
        return doSearch(topFieldCollector, minScore, query(lastDoc));
    }

    private KeyIterable<ShardId, Row> doSearch(TopFieldCollector topFieldCollector, Float minScore, Query query)
            throws IOException {
        Collector collector = topFieldCollector;
        if (minScore != null) {
            collector = new MinimumScoreCollector(collector, minScore);
        }
        searcher.search(query, collector);
        return scoreDocToIterable(topFieldCollector.topDocs().scoreDocs);
    }

    private KeyIterable<ShardId, Row> scoreDocToIterable(ScoreDoc[] scoreDocs) {
        exhausted = scoreDocs.length < batchSize;
        if (scoreDocs.length > 0) {
            lastDoc = (FieldDoc) scoreDocs[scoreDocs.length - 1];
        }
        return new KeyIterable<>(shardId(), Iterables.transform(Arrays.asList(scoreDocs), rowFunction));
    }

    private Query query(FieldDoc lastDoc) {
        Query optimizedQuery = searchAfterQueryOptimize.apply(lastDoc);
        if (optimizedQuery == null) {
            return this.query;
        }
        BooleanQuery.Builder searchAfterQuery = new BooleanQuery.Builder();
        searchAfterQuery.add(this.query, BooleanClause.Occur.MUST);
        searchAfterQuery.add(optimizedQuery, BooleanClause.Occur.MUST_NOT);
        return searchAfterQuery.build();
    }
}