com.yahoo.bard.webservice.data.dimension.impl.LuceneSearchProvider.java Source code

Java tutorial

Introduction

Here is the source code for com.yahoo.bard.webservice.data.dimension.impl.LuceneSearchProvider.java

Source

// Copyright 2016 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.data.dimension.impl;

import com.yahoo.bard.webservice.config.SystemConfig;
import com.yahoo.bard.webservice.config.SystemConfigProvider;
import com.yahoo.bard.webservice.data.cache.HashDataCache.Pair;
import com.yahoo.bard.webservice.data.dimension.Dimension;
import com.yahoo.bard.webservice.data.dimension.DimensionField;
import com.yahoo.bard.webservice.data.dimension.DimensionRow;
import com.yahoo.bard.webservice.data.dimension.KeyValueStore;
import com.yahoo.bard.webservice.data.dimension.SearchProvider;
import com.yahoo.bard.webservice.data.dimension.TimeoutException;
import com.yahoo.bard.webservice.logging.RequestLog;
import com.yahoo.bard.webservice.logging.TimedPhase;
import com.yahoo.bard.webservice.util.DimensionStoreKeyUtils;
import com.yahoo.bard.webservice.util.Pagination;
import com.yahoo.bard.webservice.util.SinglePagePagination;
import com.yahoo.bard.webservice.util.Utils;
import com.yahoo.bard.webservice.web.ApiFilter;
import com.yahoo.bard.webservice.web.DefaultFilterOperation;
import com.yahoo.bard.webservice.web.ErrorMessageFormat;
import com.yahoo.bard.webservice.web.FilterOperation;
import com.yahoo.bard.webservice.web.PageNotFoundException;
import com.yahoo.bard.webservice.web.RowLimitReachedException;
import com.yahoo.bard.webservice.web.util.PaginationParameters;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TimeLimitingCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * LuceneSearchProvider.
 * Search provider which uses lucene.
 */
public class LuceneSearchProvider implements SearchProvider {
    private static final Logger LOG = LoggerFactory.getLogger(LuceneSearchProvider.class);

    private static final Analyzer LUCENE_ANALYZER = new StandardAnalyzer();
    private static final double BUFFER_SIZE = 48;

    private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
    private final String luceneIndexPath;

    private static final SystemConfig SYSTEM_CONFIG = SystemConfigProvider.getInstance();
    public static final int LUCENE_SEARCH_TIMEOUT_MS = SYSTEM_CONFIG
            .getIntProperty(SYSTEM_CONFIG.getPackageVariableName("lucene_search_timeout_ms"), 600000);

    /**
     * The maximum number of results per page.
     */
    private int maxResults;

    private Directory luceneDirectory;
    private KeyValueStore keyValueStore;
    private Dimension dimension;
    private boolean luceneIndexIsHealthy;
    private IndexSearcher luceneIndexSearcher;
    private int searchTimeout;

    /**
     * Constructor.
     *
     * @param luceneIndexPath  Path to the lucene index files
     * @param maxResults  Maximum number of allowed results in a page
     * @param searchTimeout  Maximum time in milliseconds that a lucene search can run
     */
    public LuceneSearchProvider(String luceneIndexPath, int maxResults, int searchTimeout) {
        this.luceneIndexPath = luceneIndexPath;
        Utils.createParentDirectories(this.luceneIndexPath);

        this.maxResults = maxResults;
        this.searchTimeout = searchTimeout;

        try {
            luceneDirectory = new MMapDirectory(Paths.get(this.luceneIndexPath));
            luceneIndexIsHealthy = true;
        } catch (IOException e) {
            luceneIndexIsHealthy = false;
            String message = ErrorMessageFormat.UNABLE_TO_CREATE_DIR.format(this.luceneIndexPath);
            LOG.error(message, e);
        }
    }

    /**
     * Constructor.  The search timeout is initialized to the default (or configured) value.
     *
     * @param luceneIndexPath  Path to the lucene index files
     * @param maxResults  Maximum number of allowed results in a page
     */
    public LuceneSearchProvider(String luceneIndexPath, int maxResults) {
        this(luceneIndexPath, maxResults, LUCENE_SEARCH_TIMEOUT_MS);
    }

    /**
     * Initializes the `luceneIndexSearcher` if it has not been initialized already.
     * <p>
     * Note that the index searcher cannot be built at construction time, because it needs the dimension and
     * associated key-value store. However, because of a circular dependency between the `SearchProvider` and the
     * `Dimension` classes, we cannot provide the dimension and key-value store to the search provider at
     * construction time.
     */
    private void initializeIndexSearcher() {
        if (luceneIndexSearcher == null) {
            reopenIndexSearcher(true);
        }
    }

    /**
     * Re-open the Index Searcher, opening it for the first time if it's never been opened.
     * <p>
     * This method will attempt to acquire and release a write lock.
     *
     * @param firstTimeThrough  If true, will write an empty index and will then re-open the searcher
     */
    private void reopenIndexSearcher(boolean firstTimeThrough) {
        lock.writeLock().lock();
        try {
            // Close the current reader if open
            if (luceneIndexSearcher != null) {
                luceneIndexSearcher.getIndexReader().close();
            }

            // Open a new IndexSearcher on a new DirectoryReader
            luceneIndexSearcher = new IndexSearcher(DirectoryReader.open(luceneDirectory));
        } catch (IOException reopenException) {
            // If there is no index file, this is expected. On the 1st time through, write an empty index and try again
            if (firstTimeThrough) {
                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_ANALYZER);
                try (IndexWriter ignored = new IndexWriter(luceneDirectory, indexWriterConfig)) {
                    // Closed automatically by the try-resource block
                } catch (IOException emptyIndexWriteException) {
                    // We can't move past this, so puke
                    luceneIndexIsHealthy = false;
                    String message = String.format("Unable to write empty index to %s:", luceneIndexPath);
                    LOG.error(message, emptyIndexWriteException);
                    throw new RuntimeException(emptyIndexWriteException);
                }
                reopenIndexSearcher(false);
            } else {
                // We've been here before, so puke
                luceneIndexIsHealthy = false;
                String message = String.format("Unable to open index searcher for %s:", luceneIndexPath);
                LOG.error(message, reopenException);
                throw new RuntimeException(reopenException);
            }
        } finally {
            lock.writeLock().unlock();
        }
    }

    @Override
    public void setDimension(Dimension dimension) {
        this.dimension = dimension;
    }

    @Override
    public void setKeyValueStore(KeyValueStore keyValueStore) {
        this.keyValueStore = keyValueStore;
        // Check initialization for the cardinality in a keyValueStore
        if (keyValueStore.get(DimensionStoreKeyUtils.getCardinalityKey()) == null) {
            refreshCardinality();
        }
    }

    @Override
    public int getDimensionCardinality() {
        return Integer.parseInt(keyValueStore.getOrDefault(DimensionStoreKeyUtils.getCardinalityKey(), "0"));
    }

    @Override
    public Pagination<DimensionRow> findAllDimensionRowsPaged(PaginationParameters paginationParameters) {
        return getResultsPage(new MatchAllDocsQuery(), paginationParameters);
    }

    @Override
    public TreeSet<DimensionRow> findAllOrderedDimensionRows() {
        return new TreeSet<>(findAllDimensionRows());
    }

    @Override
    public boolean isHealthy() {
        return luceneIndexIsHealthy;
    }

    /**
     * Refresh lucene index
     * <p>
     * Query the lucene indexes on key column to see if there are any existing indexes.
     * <p>
     * If yes, then we need to update the indexes.
     * Update is done by first deleting the existing documents and then adding new ones.
     * <p>
     * If not, then just add the documents to indexes.
     *
     *
     * @param rowId  key for id column
     * @param dimensionRow  the new dimension row
     * @param dimensionRowOld  the dimension row with which the index needs to be added / updated
     */
    @Override
    public void refreshIndex(String rowId, DimensionRow dimensionRow, DimensionRow dimensionRowOld) {
        refreshIndex(Collections.singletonMap(rowId, new Pair<>(dimensionRow, dimensionRowOld)));
    }

    @Override
    public void refreshIndex(Map<String, Pair<DimensionRow, DimensionRow>> changedRows) {
        // Make a single Document instance to hold field data being updated to Lucene
        // Creating documents is costly and so Document will be reused for each record being processed due to
        // performance best practices.
        Document doc = new Document();
        Map<DimensionField, Field> dimFieldToLuceneField = new HashMap<>(dimension.getDimensionFields().size());

        // Create the document fields for this dimension and add them to the document
        for (DimensionField dimensionField : dimension.getDimensionFields()) {
            Field luceneField = new StringField(DimensionStoreKeyUtils.getColumnKey(dimensionField.getName()), "",
                    dimensionField.equals(dimension.getKey()) ? Field.Store.YES : Field.Store.NO);

            // Store the lucene field in the doc and in our lookup map
            dimFieldToLuceneField.put(dimensionField, luceneField);
            doc.add(luceneField);
        }

        // Write the rows to the document
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_ANALYZER)
                .setRAMBufferSizeMB(BUFFER_SIZE);
        lock.writeLock().lock();
        try {
            try (IndexWriter luceneIndexWriter = new IndexWriter(luceneDirectory, indexWriterConfig)) {
                // Update the document fields for each row and update the document
                for (String rowId : changedRows.keySet()) {
                    // Get the new row from the pair
                    DimensionRow newDimensionRow = changedRows.get(rowId).getKey();

                    // Update the index
                    updateDimensionRow(doc, dimFieldToLuceneField, luceneIndexWriter, newDimensionRow);
                }

            } catch (IOException e) {
                luceneIndexIsHealthy = false;
                LOG.error("Failed to refresh index for dimension rows", e);
                throw new RuntimeException(e);
                // Commit all the changes to the index (on .close, called by try-resources) and refresh the cardinality
            }
            //This must be outside the try-resources block because it may _also_ need to open an IndexWriter, and
            //opening an IndexWriter involves taking a write lock on lucene, of which there can only be one at a time.
            reopenIndexSearcher(true);
            refreshCardinality();
        } finally {
            lock.writeLock().unlock();
        }
    }

    /**
     * Update the dimension row in the index.
     *
     * @param luceneDimensionRowDoc  Document to use for doing the update
     * @param fieldMap  Mapping of DimensionFields to the Document's fields
     * @param writer  Lucene IndexWriter to update the indexes of
     * @param newRow  Row to update
     *
     * @throws IOException if there is a problem updating the document
     */
    private void updateDimensionRow(Document luceneDimensionRowDoc, Map<DimensionField, Field> fieldMap,
            IndexWriter writer, DimensionRow newRow) throws IOException {
        // Update the document fields with each field from the new dimension row
        for (DimensionField field : dimension.getDimensionFields()) {
            // Get the field to update from the lookup map
            Field fieldToUpdate = fieldMap.get(field);

            // Set field value to updated value
            fieldToUpdate.setStringValue(newRow.getOrDefault(field, ""));
        }

        // Build the term to delete the old document by the key value (which should be unique)
        Term keyTerm = new Term(fieldMap.get(dimension.getKey()).name(),
                newRow.getOrDefault(dimension.getKey(), ""));

        // Update the document by the key term
        writer.updateDocument(keyTerm, luceneDimensionRowDoc);
    }

    @Override
    public void replaceIndex(String newLuceneIndexPathString) {
        LOG.debug("Replacing Lucene indexes at {} for dimension {} with new index at {}",
                luceneDirectory.toString(), dimension.getApiName(), newLuceneIndexPathString);

        lock.writeLock().lock();
        try {
            Path oldLuceneIndexPath = Paths.get(luceneIndexPath);
            String tempDir = oldLuceneIndexPath.resolveSibling(oldLuceneIndexPath.getFileName() + "_old")
                    .toString();

            LOG.trace("Moving old Lucene index directory from {} to {} ...", luceneIndexPath, tempDir);
            moveDirEntries(luceneIndexPath, tempDir);

            LOG.trace("Moving all new Lucene indexes from {} to {} ...", newLuceneIndexPathString, luceneIndexPath);
            moveDirEntries(newLuceneIndexPathString, luceneIndexPath);

            LOG.trace("Deleting {} since new Lucene indexes have been moved away from there and is now empty",
                    newLuceneIndexPathString);
            deleteDir(newLuceneIndexPathString);

            LOG.trace("Deleting old Lucene indexes in {} ...", tempDir);
            deleteDir(tempDir);

            reopenIndexSearcher(false);
        } finally {
            lock.writeLock().unlock();
        }
    }

    /**
     * Moves all files and sub-directories from one location to another.
     * <p>
     * Two locations must exist before calling this method.
     *
     * @param sourceDir  The location where files and sub-directories will be moved from
     * @param destinationDir  The location where files and sub-directories will be moved to
     */
    private static void moveDirEntries(String sourceDir, String destinationDir) {
        Path sourcePath = Paths.get(sourceDir).toAbsolutePath();
        Path destinationPath = Paths.get(destinationDir).toAbsolutePath();

        if (!Files.exists(destinationPath)) {
            try {
                Files.createDirectory(destinationPath);
            } catch (IOException e) {
                LOG.error(ErrorMessageFormat.UNABLE_TO_CREATE_DIR.format(destinationDir));
                throw new RuntimeException(e);
            }
        }

        try {
            Files.walkFileTree(sourcePath, new SimpleFileVisitor<Path>() {
                @Override
                public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes basicFileAttributes)
                        throws IOException {
                    Path destinationDirPath = destinationPath.resolve(sourcePath.relativize(dir));
                    if (!Files.exists(destinationDirPath)) {
                        Files.createDirectory(destinationDirPath);
                        LOG.trace("Creating sub-directory {} under {} ...", dir, destinationDir);
                    }
                    return FileVisitResult.CONTINUE;
                }

                @Override
                public FileVisitResult visitFile(Path file, BasicFileAttributes basicFileAttributes)
                        throws IOException {
                    Path destinationFileName = destinationPath.resolve(sourcePath.relativize(file));
                    LOG.trace("Moving {} to {}", file, destinationFileName);
                    Files.move(file, destinationFileName);
                    return FileVisitResult.CONTINUE;
                }
            });
        } catch (IOException e) {
            LOG.error("I/O error thrown by SimpleFileVisitor method");
            throw new RuntimeException(e);
        }
    }

    /**
     * Deletes a directory and all entries under that directory.
     *
     * @param path  The location of the directory that is to be deleted
     */
    private static void deleteDir(String path) {
        try {
            FileUtils.deleteDirectory(new File(path));
        } catch (IOException e) {
            String message = ErrorMessageFormat.UNABLE_TO_DELETE_DIR.format(path);
            LOG.error(message);
            throw new RuntimeException(message);
        }
    }

    /**
     * Clears the dimension cache, and resets the indices, effectively resetting the SearchProvider to a clean state.
     * <p>
     * Note that this method attempts to acquire a write lock before clearing the index.
     */
    @Override
    public void clearDimension() {
        Set<DimensionRow> dimensionRows = findAllDimensionRows();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_ANALYZER)
                .setRAMBufferSizeMB(BUFFER_SIZE);
        lock.writeLock().lock();
        try {
            try (IndexWriter writer = new IndexWriter(luceneDirectory, indexWriterConfig)) {
                //Remove all dimension data from the store.
                String rowId = dimension.getKey().getName();
                dimensionRows.stream().map(DimensionRow::getRowMap).map(map -> map.get(rowId))
                        .map(id -> DimensionStoreKeyUtils.getRowKey(rowId, id)).forEach(keyValueStore::remove);

                //Since Lucene's indices are being dropped, the dimension field stored via the columnKey is becoming
                //stale.
                keyValueStore.remove(DimensionStoreKeyUtils.getColumnKey(dimension.getKey().getName()));
                //The allValues key mapping needs to reflect the fact that we are dropping all dimension data.
                keyValueStore.put(DimensionStoreKeyUtils.getAllValuesKey(), "[]");
                //We're resetting the keyValueStore, so we don't want any stale last updated date floating around.
                keyValueStore.remove(DimensionStoreKeyUtils.getLastUpdatedKey());

                //In addition to clearing the keyValueStore, we also need to delete all of Lucene's segment files.
                writer.deleteAll();
                writer.commit();
            } catch (IOException e) {
                LOG.error(ErrorMessageFormat.FAIL_TO_WIPTE_LUCENE_INDEX_DIR.format(luceneDirectory));
                throw new RuntimeException(e);
            }

            //This must be outside the try-resources block because it may _also_ need to open an IndexWriter, and
            //opening an IndexWriter involves taking a write lock on lucene, of which there can only be one at a time.
            reopenIndexSearcher(true);
            refreshCardinality();
        } finally {
            lock.writeLock().unlock();
        }
    }

    /**
     * Update the cardinality count.
     * <p>
     * Note that this method acquires a read lock to query the lucene index for the number of documents.
     */
    private void refreshCardinality() {
        int numDocs;
        initializeIndexSearcher();
        lock.readLock().lock();
        try {
            numDocs = luceneIndexSearcher.getIndexReader().numDocs();
        } finally {
            lock.readLock().unlock();
        }
        keyValueStore.put(DimensionStoreKeyUtils.getCardinalityKey(), Integer.toString(numDocs));
    }

    @Override
    public Pagination<DimensionRow> findFilteredDimensionRowsPaged(Set<ApiFilter> filters,
            PaginationParameters paginationParameters) {
        return getResultsPage(getFilterQuery(filters), paginationParameters);
    }

    /**
     * Returns a Collector that accumulates boolean queries into a single nested query, and ties all of the sub
     * queries together with the specified {@link BooleanClause.Occur}.
     *
     * @param occur  The Occur operator to tie the boolean queries together with
     *
     * @return A Collector that ties a collection of boolean queries into a single query with the specified
     * occurrence operator
     */
    private static Collector<Query, BooleanQuery.Builder, BooleanQuery.Builder> getBooleanQueryCollector(
            BooleanClause.Occur occur) {
        return Collector.of(BooleanQuery.Builder::new, (builder, termQuery) -> builder.add(termQuery, occur),
                (accumulator, termQueryBuilder) -> accumulator.add(termQueryBuilder.build(), occur));
    }

    /**
     * In-filter operation.
     *
     * @param luceneFieldName  Name of the lucene field to filter on
     * @param filter  New filter to add to the query
     *
     * @return A builder that knows how to build the appropriate BooleanQuery
     */
    private BooleanQuery inFilterQuery(String luceneFieldName, ApiFilter filter) {
        return filterToTermQueries(luceneFieldName, filter)
                .collect(getBooleanQueryCollector(BooleanClause.Occur.SHOULD)).build();
    }

    /**
     * Given an ApiFilter, returns a stream of term queries, one for each value in the filter.
     *
     * @param luceneFieldName  Name of the lucene field to filter on
     * @param filter  The filter to be turned into term queries
     *
     * @return A stream of term queries
     */
    private Stream<TermQuery> filterToTermQueries(String luceneFieldName, ApiFilter filter) {
        return filter.getValues().stream().map(value -> new Term(luceneFieldName, value)).map(TermQuery::new);
    }

    /**
     * Startswith-filter operation.
     *
     * @param luceneFieldName  Name of the lucene field to filter on
     * @param filter  New filter to add to the query
     *
     * @return A builder that knows how to build the appropriate BooleanQuery
     */
    private BooleanQuery startswithFilterQuery(String luceneFieldName, ApiFilter filter) {
        return filter.getValues().stream().map(value -> new Term(luceneFieldName, value)).map(PrefixQuery::new)
                .collect(getBooleanQueryCollector(BooleanClause.Occur.SHOULD)).build();
    }

    /**
     * Contains filter operation.
     *
     * @param luceneFieldName  Name of the lucene field to filter on
     * @param filter  New filter to add to the query
     *
     * @return A builder that knows how to build the appropriate BooleanQuery
     */
    private BooleanQuery containsFilterQuery(String luceneFieldName, ApiFilter filter) {
        return filter.getValues().stream().map(value -> new Term(luceneFieldName, "*" + value + "*"))
                .map(WildcardQuery::new).collect(getBooleanQueryCollector(BooleanClause.Occur.SHOULD)).build();
    }

    /**
     * Get query with filter parameters.
     *
     * @param filters  The set of filters
     *
     * @return A query to find all the dimension rows that satisfy the given filter
     */
    private Query getFilterQuery(Set<ApiFilter> filters) {
        /*
        Intuitively, Lucene performs searching for each BooleanQuery as follows:
        1. Start with an empty set of results.
        2. Find all the results that MUST and/or SHOULD appear
        3. Remove those entries that MUST_NOT appear
            
        Therefore, if a query has nothing but MUST_NOTs, then we end up removing elements from the
        original empty set of results. This is true even if the query appears in a nested query.
            
        For example, consider the filter:
        property|id-in[sports,finance],location|id-notin[US,Canada],language|id-in[english], which simplifies by
        DeMorgan's Law into
            
        (sports || finance) && (!US && !Canada) && (english)
            
        If we naively translated each clause in parenthesis into their own boolean queries, and then chained them
        together using MUST, we would always get an empty set of results, because (!US && !Canada) and is an
        individual query with nothing but MUST_NOT.
            
        Therefore, we need to guarantee that our negative clauses are not treated as queries. Fortunately,
        by the associativity of conjunction, the above query is equivalent to:
            
        (sports || finance) && !US && !Canada && english or in Lucene Speak:
            
        MUST(SHOULD(sports), SHOULD(finance)), MUST_NOT(US), MUST_NOT(Canada), MUST(english)
         */

        BooleanQuery.Builder filterQueryBuilder = new BooleanQuery.Builder();
        boolean hasPositive = false;
        for (ApiFilter filter : filters) {
            FilterOperation op = filter.getOperation();
            if (!(op instanceof DefaultFilterOperation)) {
                LOG.error("Illegal Filter operation : {}, only default filter ops supported",
                        filter.getOperation());
                throw new IllegalArgumentException(
                        "Only supports default filter operations: in, notin, startswith, contains, eq");
            }
            DefaultFilterOperation defaultFilterOp = (DefaultFilterOperation) op;

            String luceneFieldName = DimensionStoreKeyUtils.getColumnKey(filter.getDimensionField().getName());
            switch (defaultFilterOp) {
            case eq:
                // fall through on purpose since eq and in have the same functionality
            case in:
                hasPositive = true;
                filterQueryBuilder.add(inFilterQuery(luceneFieldName, filter), BooleanClause.Occur.MUST);
                break;
            case notin:
                //Add each negative clause to the top-level query
                filterToTermQueries(luceneFieldName, filter)
                        .forEach(query -> filterQueryBuilder.add(query, BooleanClause.Occur.MUST_NOT));
                break;
            case startswith:
                filterQueryBuilder.add(startswithFilterQuery(luceneFieldName, filter), BooleanClause.Occur.MUST);
                hasPositive = true;
                break;
            case contains:
                filterQueryBuilder.add(containsFilterQuery(luceneFieldName, filter), BooleanClause.Occur.MUST);
                hasPositive = true;
                break;
            default:
                LOG.debug("Illegal Filter operation : {}", filter.getOperation());
                throw new IllegalArgumentException("Invalid Filter Operation.");
            }
        }

        if (!hasPositive) {
            //If we don't have any positive queries, then we want the entire Universe, except for values that match
            //the negative queries.
            filterQueryBuilder.add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST));
        }

        BooleanQuery booleanQuery = filterQueryBuilder.build();
        LOG.trace("Translated ApiFilters {} into Lucene Query: {}", filters, booleanQuery);
        return booleanQuery;
    }

    /**
     * Returns the desired page of dimension rows found by the specified query with the relevant metadata.
     *
     * @param query  The Lucene query used to locate the desired DimensionRows
     * @param paginationParameters  The parameters defining the pagination (i.e. the number of rows per page, and the
     * desired page)
     * <p>
     * Note that this method _may_ need to acquire and release a write lock if the index searcher needs to be
     * initialized, and it later acquires and released a read lock when querying for dimension data from Lucene.
     *
     * @return The desired page of dimension rows that satisfy the given query
     *
     * @throws PageNotFoundException if the page requested is past the last page of results
     */
    private Pagination<DimensionRow> getResultsPage(Query query, PaginationParameters paginationParameters)
            throws PageNotFoundException {
        int perPage = paginationParameters.getPerPage();
        validatePerPage(perPage);

        TreeSet<DimensionRow> filteredDimRows;
        int documentCount;
        initializeIndexSearcher();
        LOG.trace("Lucene Query {}", query);

        lock.readLock().lock();
        try {
            ScoreDoc[] hits;
            try (TimedPhase timer = RequestLog.startTiming("QueryingLucene")) {
                TopDocs hitDocs = getPageOfData(luceneIndexSearcher, null, query, perPage);
                hits = hitDocs.scoreDocs;
                documentCount = hitDocs.totalHits;
                int requestedPageNumber = paginationParameters.getPage(documentCount);
                if (hits.length == 0) {
                    if (requestedPageNumber == 1) {
                        return new SinglePagePagination<>(Collections.emptyList(), paginationParameters, 0);
                    } else {
                        throw new PageNotFoundException(requestedPageNumber, perPage, 0);
                    }
                }
                for (int currentPage = 1; currentPage < requestedPageNumber; currentPage++) {
                    ScoreDoc lastEntry = hits[hits.length - 1];
                    hits = getPageOfData(luceneIndexSearcher, lastEntry, query, perPage).scoreDocs;
                    if (hits.length == 0) {
                        throw new PageNotFoundException(requestedPageNumber, perPage, 0);
                    }
                }
            }

            // convert hits to dimension rows
            try (TimedPhase timer = RequestLog.startTiming("LuceneHydratingDimensionRows")) {
                String idKey = DimensionStoreKeyUtils.getColumnKey(dimension.getKey().getName());
                filteredDimRows = Arrays.stream(hits).map(hit -> {
                    try {
                        return luceneIndexSearcher.doc(hit.doc);
                    } catch (IOException e) {
                        LOG.error("Unable to convert hit " + hit);
                        throw new RuntimeException(e);
                    }
                }).map(document -> document.get(idKey)).map(dimension::findDimensionRowByKeyValue)
                        .collect(Collectors.toCollection(TreeSet::new));
            }
        } finally {
            lock.readLock().unlock();
        }
        return new SinglePagePagination<>(
                Collections.unmodifiableList(filteredDimRows.stream().collect(Collectors.toList())),
                paginationParameters, documentCount);
    }

    /**
     * Check if perPage exceeds limit of max number of rows to be returned.
     *
     * @param perPage  The number of entries per page
     */
    private void validatePerPage(int perPage) {
        if (perPage > maxResults) {
            String msg = String.format("Number of rows requested exceeds request limit of %d", maxResults);
            throw new RowLimitReachedException(msg);
        }
    }

    /**
     * Returns the requested page of dimension metadata from Lucene.
     * <p>
     * Note that this method acquires and releases a read lock when querying Lucene for data.
     *
     * @param indexSearcher  The service to find the desired dimension metadata in the Lucene index
     * @param lastEntry  The last entry from the previous page of dimension metadata, the indexSearcher will begin its
     * search after this entry (if lastEntry is null, the indexSearcher will begin its search from the beginning)
     * @param query  The Lucene query used to locate the desired dimension metadata
     * @param perPage  The number of entries per page
     *
     * @return The desired page of dimension metadata
     */
    private TopDocs getPageOfData(IndexSearcher indexSearcher, ScoreDoc lastEntry, Query query, int perPage) {
        TimeLimitingCollectorManager manager = new TimeLimitingCollectorManager(searchTimeout, lastEntry, perPage);
        lock.readLock().lock();
        try {
            return indexSearcher.search(query, manager);
        } catch (IOException e) {
            String errorMessage = "Unable to find dimension rows for specified page.";
            LOG.error(errorMessage);
            throw new RuntimeException(errorMessage);
        } catch (TimeLimitingCollector.TimeExceededException e) {
            LOG.warn("Lucene query timeout: {}. {}", query, e.getMessage());
            throw new TimeoutException(e.getMessage(), e);
        } finally {
            lock.readLock().unlock();
        }
    }
}