org.apache.lucene.facet.DrillSideways.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.facet.DrillSideways.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.facet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;

import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.FilterCollector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.MultiCollectorManager;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.util.ThreadInterruptedException;

/**
 * Computes drill down and sideways counts for the provided
 * {@link DrillDownQuery}.  Drill sideways counts include
 * alternative values/aggregates for the drill-down
 * dimensions so that a dimension does not disappear after
 * the user drills down into it.
 * <p> Use one of the static search
 * methods to do the search, and then get the hits and facet
 * results from the returned {@link DrillSidewaysResult}.
 * <p><b>NOTE</b>: this allocates one {@link
 * FacetsCollector} for each drill-down, plus one.  If your
 * index has high number of facet labels then this will
 * multiply your memory usage.
 *
 * @lucene.experimental
 */
public class DrillSideways {

    /**
     * {@link IndexSearcher} passed to constructor.
     */
    protected final IndexSearcher searcher;

    /**
     * {@link TaxonomyReader} passed to constructor.
     */
    protected final TaxonomyReader taxoReader;

    /**
     * {@link SortedSetDocValuesReaderState} passed to
     * constructor; can be null.
     */
    protected final SortedSetDocValuesReaderState state;

    /**
     * {@link FacetsConfig} passed to constructor.
     */
    protected final FacetsConfig config;

    // These are only used for multi-threaded search
    private final ExecutorService executor;

    /**
     * Create a new {@code DrillSideways} instance.
     */
    public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader) {
        this(searcher, config, taxoReader, null);
    }

    /**
     * Create a new {@code DrillSideways} instance, assuming the categories were
     * indexed with {@link SortedSetDocValuesFacetField}.
     */
    public DrillSideways(IndexSearcher searcher, FacetsConfig config, SortedSetDocValuesReaderState state) {
        this(searcher, config, null, state);
    }

    /**
     * Create a new {@code DrillSideways} instance, where some
     * dimensions were indexed with {@link
     * SortedSetDocValuesFacetField} and others were indexed
     * with {@link FacetField}.
     */
    public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader,
            SortedSetDocValuesReaderState state) {
        this(searcher, config, taxoReader, state, null);
    }

    /**
     * Create a new {@code DrillSideways} instance, where some
     * dimensions were indexed with {@link
     * SortedSetDocValuesFacetField} and others were indexed
     * with {@link FacetField}.
     * <p>
     * Use this constructor to use the concurrent implementation and/or the CollectorManager
     */
    public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader,
            SortedSetDocValuesReaderState state, ExecutorService executor) {
        this.searcher = searcher;
        this.config = config;
        this.taxoReader = taxoReader;
        this.state = state;
        this.executor = executor;
    }

    /**
     * Subclass can override to customize per-dim Facets
     * impl.
     */
    protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways,
            String[] drillSidewaysDims) throws IOException {

        Facets drillDownFacets;
        Map<String, Facets> drillSidewaysFacets = new HashMap<>();

        if (taxoReader != null) {
            drillDownFacets = new FastTaxonomyFacetCounts(taxoReader, config, drillDowns);
            if (drillSideways != null) {
                for (int i = 0; i < drillSideways.length; i++) {
                    drillSidewaysFacets.put(drillSidewaysDims[i],
                            new FastTaxonomyFacetCounts(taxoReader, config, drillSideways[i]));
                }
            }
        } else {
            drillDownFacets = new SortedSetDocValuesFacetCounts(state, drillDowns);
            if (drillSideways != null) {
                for (int i = 0; i < drillSideways.length; i++) {
                    drillSidewaysFacets.put(drillSidewaysDims[i],
                            new SortedSetDocValuesFacetCounts(state, drillSideways[i]));
                }
            }
        }

        if (drillSidewaysFacets.isEmpty()) {
            return drillDownFacets;
        } else {
            return new MultiFacets(drillSidewaysFacets, drillDownFacets);
        }
    }

    /**
     * Search, collecting hits with a {@link Collector}, and
     * computing drill down and sideways counts.
     */
    public DrillSidewaysResult search(DrillDownQuery query, Collector hitCollector) throws IOException {

        Map<String, Integer> drillDownDims = query.getDims();

        FacetsCollector drillDownCollector = new FacetsCollector();

        if (drillDownDims.isEmpty()) {
            // There are no drill-down dims, so there is no
            // drill-sideways to compute:
            searcher.search(query, MultiCollector.wrap(hitCollector, drillDownCollector));
            return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, null, null), null);
        }

        Query baseQuery = query.getBaseQuery();
        if (baseQuery == null) {
            // TODO: we could optimize this pure-browse case by
            // making a custom scorer instead:
            baseQuery = new MatchAllDocsQuery();
        }
        Query[] drillDownQueries = query.getDrillDownQueries();

        FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.size()];
        for (int i = 0; i < drillSidewaysCollectors.length; i++) {
            drillSidewaysCollectors[i] = new FacetsCollector();
        }

        DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors,
                drillDownQueries, scoreSubDocsAtOnce());
        if (hitCollector.scoreMode().needsScores() == false) {
            // this is a horrible hack in order to make sure IndexSearcher will not
            // attempt to cache the DrillSidewaysQuery
            hitCollector = new FilterCollector(hitCollector) {
                @Override
                public ScoreMode scoreMode() {
                    return ScoreMode.COMPLETE;
                }
            };
        }
        searcher.search(dsq, hitCollector);

        return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, drillSidewaysCollectors,
                drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null);
    }

    /**
     * Search, sorting by {@link Sort}, and computing
     * drill down and sideways counts.
     */
    public DrillSidewaysResult search(DrillDownQuery query, Query filter, FieldDoc after, int topN, Sort sort,
            boolean doDocScores) throws IOException {
        if (filter != null) {
            query = new DrillDownQuery(config, filter, query);
        }
        if (sort != null) {
            int limit = searcher.getIndexReader().maxDoc();
            if (limit == 0) {
                limit = 1; // the collector does not alow numHits = 0
            }
            final int fTopN = Math.min(topN, limit);

            if (executor != null) { // We have an executor, let use the multi-threaded version

                final CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager = new CollectorManager<TopFieldCollector, TopFieldDocs>() {

                    @Override
                    public TopFieldCollector newCollector() throws IOException {
                        return TopFieldCollector.create(sort, fTopN, after, Integer.MAX_VALUE);
                    }

                    @Override
                    public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException {
                        final TopFieldDocs[] topFieldDocs = new TopFieldDocs[collectors.size()];
                        int pos = 0;
                        for (TopFieldCollector collector : collectors)
                            topFieldDocs[pos++] = collector.topDocs();
                        return TopDocs.merge(sort, topN, topFieldDocs);
                    }

                };
                ConcurrentDrillSidewaysResult<TopFieldDocs> r = search(query, collectorManager);
                TopFieldDocs topDocs = r.collectorResult;
                if (doDocScores) {
                    TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, query);
                }
                return new DrillSidewaysResult(r.facets, topDocs);

            } else {

                final TopFieldCollector hitCollector = TopFieldCollector.create(sort, fTopN, after,
                        Integer.MAX_VALUE);
                DrillSidewaysResult r = search(query, hitCollector);
                TopFieldDocs topDocs = hitCollector.topDocs();
                if (doDocScores) {
                    TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, query);
                }
                return new DrillSidewaysResult(r.facets, topDocs);
            }
        } else {
            return search(after, query, topN);
        }
    }

    /**
     * Search, sorting by score, and computing
     * drill down and sideways counts.
     */
    public DrillSidewaysResult search(DrillDownQuery query, int topN) throws IOException {
        return search(null, query, topN);
    }

    /**
     * Search, sorting by score, and computing
     * drill down and sideways counts.
     */
    public DrillSidewaysResult search(ScoreDoc after, DrillDownQuery query, int topN) throws IOException {
        int limit = searcher.getIndexReader().maxDoc();
        if (limit == 0) {
            limit = 1; // the collector does not alow numHits = 0
        }
        final int fTopN = Math.min(topN, limit);

        if (executor != null) { // We have an executor, let use the multi-threaded version

            final CollectorManager<TopScoreDocCollector, TopDocs> collectorManager = new CollectorManager<TopScoreDocCollector, TopDocs>() {

                @Override
                public TopScoreDocCollector newCollector() throws IOException {
                    return TopScoreDocCollector.create(fTopN, after, Integer.MAX_VALUE);
                }

                @Override
                public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException {
                    final TopDocs[] topDocs = new TopDocs[collectors.size()];
                    int pos = 0;
                    for (TopScoreDocCollector collector : collectors)
                        topDocs[pos++] = collector.topDocs();
                    return TopDocs.merge(topN, topDocs);
                }

            };
            ConcurrentDrillSidewaysResult<TopDocs> r = search(query, collectorManager);
            return new DrillSidewaysResult(r.facets, r.collectorResult);

        } else {

            TopScoreDocCollector hitCollector = TopScoreDocCollector.create(topN, after, Integer.MAX_VALUE);
            DrillSidewaysResult r = search(query, hitCollector);
            return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
        }
    }

    /**
     * Override this and return true if your collector
     * (e.g., {@code ToParentBlockJoinCollector}) expects all
     * sub-scorers to be positioned on the document being
     * collected.  This will cause some performance loss;
     * default is false.
     */
    protected boolean scoreSubDocsAtOnce() {
        return false;
    }

    /**
     * Result of a drill sideways search, including the
     * {@link Facets} and {@link TopDocs}.
     */
    public static class DrillSidewaysResult {
        /**
         * Combined drill down and sideways results.
         */
        public final Facets facets;

        /**
         * Hits.
         */
        public final TopDocs hits;

        /**
         * Sole constructor.
         */
        public DrillSidewaysResult(Facets facets, TopDocs hits) {
            this.facets = facets;
            this.hits = hits;
        }
    }

    private static class CallableCollector implements Callable<CallableResult> {

        private final int pos;
        private final IndexSearcher searcher;
        private final Query query;
        private final CollectorManager<?, ?> collectorManager;

        private CallableCollector(int pos, IndexSearcher searcher, Query query,
                CollectorManager<?, ?> collectorManager) {
            this.pos = pos;
            this.searcher = searcher;
            this.query = query;
            this.collectorManager = collectorManager;
        }

        @Override
        public CallableResult call() throws Exception {
            return new CallableResult(pos, searcher.search(query, collectorManager));
        }
    }

    private static class CallableResult {

        private final int pos;
        private final Object result;

        private CallableResult(int pos, Object result) {
            this.pos = pos;
            this.result = result;
        }
    }

    private DrillDownQuery getDrillDownQuery(final DrillDownQuery query, Query[] queries,
            final String excludedDimension) {
        final DrillDownQuery ddl = new DrillDownQuery(config, query.getBaseQuery());
        query.getDims().forEach((dim, pos) -> {
            if (!dim.equals(excludedDimension))
                ddl.add(dim, queries[pos]);
        });
        return ddl.getDims().size() == queries.length ? null : ddl;
    }

    /** Runs a search, using a {@link CollectorManager} to gather and merge search results */
    public <R> ConcurrentDrillSidewaysResult<R> search(final DrillDownQuery query,
            final CollectorManager<?, R> hitCollectorManager) throws IOException {

        final Map<String, Integer> drillDownDims = query.getDims();
        final List<CallableCollector> callableCollectors = new ArrayList<>(drillDownDims.size() + 1);

        // Add the main DrillDownQuery
        callableCollectors.add(new CallableCollector(-1, searcher, query,
                new MultiCollectorManager(new FacetsCollectorManager(), hitCollectorManager)));
        int i = 0;
        final Query[] filters = query.getDrillDownQueries();
        for (String dim : drillDownDims.keySet())
            callableCollectors.add(new CallableCollector(i++, searcher, getDrillDownQuery(query, filters, dim),
                    new FacetsCollectorManager()));

        final FacetsCollector mainFacetsCollector;
        final FacetsCollector[] facetsCollectors = new FacetsCollector[drillDownDims.size()];
        final R collectorResult;

        try {
            // Run the query pool
            final List<Future<CallableResult>> futures = executor.invokeAll(callableCollectors);

            // Extract the results
            final Object[] mainResults = (Object[]) futures.get(0).get().result;
            mainFacetsCollector = (FacetsCollector) mainResults[0];
            collectorResult = (R) mainResults[1];
            for (i = 1; i < futures.size(); i++) {
                final CallableResult result = futures.get(i).get();
                facetsCollectors[result.pos] = (FacetsCollector) result.result;
            }
            // Fill the null results with the mainFacetsCollector
            for (i = 0; i < facetsCollectors.length; i++)
                if (facetsCollectors[i] == null)
                    facetsCollectors[i] = mainFacetsCollector;

        } catch (InterruptedException e) {
            throw new ThreadInterruptedException(e);
        } catch (ExecutionException e) {
            throw new RuntimeException(e);
        }

        // build the facets and return the result
        return new ConcurrentDrillSidewaysResult<>(buildFacetsResult(mainFacetsCollector, facetsCollectors,
                drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null, collectorResult);
    }

    /**
     * Result of a concurrent drill sideways search, including the
     * {@link Facets} and {@link TopDocs}.
     */
    public static class ConcurrentDrillSidewaysResult<R> extends DrillSidewaysResult {

        /** The merged search results */
        public final R collectorResult;

        /**
         * Sole constructor.
         */
        ConcurrentDrillSidewaysResult(Facets facets, TopDocs hits, R collectorResult) {
            super(facets, hits);
            this.collectorResult = collectorResult;
        }
    }
}