org.modeshape.jcr.index.lucene.Searcher.java Source code

Java tutorial

Introduction

Here is the source code for org.modeshape.jcr.index.lucene.Searcher.java

Source

/*
 * ModeShape (http://www.modeshape.org)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.modeshape.jcr.index.lucene;

import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import javax.jcr.query.qom.Constraint;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LRUQueryCache;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.Bits;
import org.modeshape.common.annotation.Immutable;
import org.modeshape.common.annotation.ThreadSafe;
import org.modeshape.common.logging.Logger;
import org.modeshape.common.util.NamedThreadFactory;
import org.modeshape.jcr.cache.NodeKey;
import org.modeshape.jcr.index.lucene.query.LuceneQueryFactory;
import org.modeshape.jcr.spi.index.IndexConstraints;
import org.modeshape.jcr.spi.index.provider.Filter;

/**
 * Class which handles the actual Lucene searching for the {@link LuceneIndexProvider}
 * 
 * @author Horia Chiorean (hchiorea@redhat.com)
 * @since 4.5
 */
@Immutable
@ThreadSafe
public class Searcher {
    // the implicit score that will be used when no explicit scoring is requested
    protected static final float DEFAULT_SCORE = 1.0f;

    private static final Logger LOGGER = Logger.getLogger(Searcher.class);
    private static final int MAX_QUERIES_TO_CACHE = 200;
    private static final long MAX_RAM_BYTES_TO_USE = 50 * 1024L * 1024L;

    private static final Set<String> ID_FIELD_SET = Collections.singleton(FieldUtil.ID);

    private final SearcherManager searchManager;
    private final ScheduledExecutorService searchManagerRefreshService;
    private final ScheduledFuture<?> searchManagerRefreshResult;
    private final QueryCache queryCache;

    protected Searcher(LuceneConfig config, IndexWriter writer, String name) {
        this.searchManager = config.searchManager(writer);
        this.queryCache = new LRUQueryCache(MAX_QUERIES_TO_CACHE, MAX_RAM_BYTES_TO_USE);
        this.searchManagerRefreshService = Executors.newScheduledThreadPool(1,
                new NamedThreadFactory(name + "-lucene-search-manager-refresher"));
        this.searchManagerRefreshResult = this.searchManagerRefreshService.scheduleWithFixedDelay(
                this::refreshSearchManager, 0, config.refreshTimeSeconds(), TimeUnit.SECONDS);
    }

    protected void close() {
        try {
            searchManagerRefreshResult.cancel(false);
            searchManagerRefreshService.shutdown();
            searchManager.close();
        } catch (IOException e) {
            LOGGER.warn(e, LuceneIndexProviderI18n.warnErrorWhileClosingSearcher);
        }
    }

    protected Filter.Results filter(IndexConstraints indexConstraints, LuceneQueryFactory queryFactory,
            long cardinalityEstimate) {
        Query query = createQueryFromConstraints(indexConstraints.getConstraints(), queryFactory);
        return new LuceneResults(query, queryFactory.scoreDocuments(), cardinalityEstimate);
    }

    protected long estimateCardinality(final List<Constraint> andedConstraints,
            final LuceneQueryFactory queryFactory) throws IOException {
        return search(searcher -> {
            Query query = createQueryFromConstraints(andedConstraints, queryFactory);
            return (long) searcher.count(query);
        }, true);
    }

    protected Document loadDocumentById(final String id) throws IOException {
        // this is a potentially costly operation
        return search(searcher -> {
            DocumentByIdCollector collector = new DocumentByIdCollector();
            searcher.search(FieldUtil.idQuery(id), collector);
            return collector.document();
        }, true);
    }

    private Query createQueryFromConstraints(Collection<Constraint> andedConstraints,
            LuceneQueryFactory queryFactory) {
        if (andedConstraints.isEmpty()) {
            // if there are no anded constraint but this index was called to filter results, simply return everything...
            return new MatchAllDocsQuery();
        } else if (andedConstraints.size() == 1) {
            return queryFactory.createQuery(andedConstraints.iterator().next());
        } else {
            BooleanQuery.Builder builder = new BooleanQuery.Builder();
            builder.setDisableCoord(true);
            for (Constraint constraint : andedConstraints) {
                builder.add(queryFactory.createQuery(constraint), BooleanClause.Occur.MUST);
            }
            return builder.build();
        }
    }

    protected void refreshSearchManager() {
        try {
            searchManager.maybeRefreshBlocking();
        } catch (InterruptedIOException ie) {
            Thread.currentThread().interrupt();
        } catch (IOException e) {
            LOGGER.warn(e, LuceneIndexProviderI18n.warnErrorWhileClosingSearcher);
        }
    }

    protected <T> T search(Searchable<T> searchable, boolean refreshReader) {
        if (refreshReader) {
            refreshSearchManager();
        }
        IndexSearcher searcher = null;
        try {
            searcher = searchManager.acquire();
            searcher.setQueryCache(queryCache);
            return searchable.search(searcher);
        } catch (IOException e) {
            throw new LuceneIndexException(e);
        } finally {
            if (searcher != null) {
                try {
                    searchManager.release(searcher);
                } catch (IOException e) {
                    LOGGER.debug(e, "Cannot release Lucene searcher");
                }
            }
        }
    }

    private class LuceneResults implements Filter.Results {

        private final boolean scoreDocuments;
        private final long size;

        private Query query;
        private Iterator<NodeKey> keysIterator;
        private Iterator<Float> scoresIterator;
        private int currentBatch;

        protected LuceneResults(Query query, boolean scoreDocuments, long size) {
            this.scoreDocuments = scoreDocuments;
            this.query = query;
            this.currentBatch = 0;
            this.size = size;
        }

        @Override
        public Filter.ResultBatch getNextBatch(final int batchSize) {
            int startPosition = currentBatch++ * batchSize;
            int endPosition = (int) Math.min(size, startPosition + batchSize);
            boolean hasNextBatch = endPosition != size;
            int size = endPosition - startPosition;
            return new Filter.ResultBatch() {
                private int keysCount = 0;
                private int scoresCount = 0;

                @Override
                public Iterable<NodeKey> keys() {
                    return () -> new Iterator<NodeKey>() {
                        @Override
                        public boolean hasNext() {
                            if (keysCount == size) {
                                return false;
                            }
                            if (keysIterator == null) {
                                runQuery();
                            }
                            return keysIterator.hasNext();
                        }

                        @Override
                        public NodeKey next() {
                            if (keysCount++ == size) {
                                throw new NoSuchElementException();
                            }
                            if (keysIterator == null) {
                                runQuery();
                            }
                            return keysIterator.next();
                        }
                    };
                }

                @Override
                public Iterable<Float> scores() {
                    return () -> new Iterator<Float>() {
                        @Override
                        public boolean hasNext() {
                            if (scoresCount == size) {
                                return false;
                            }
                            if (scoresIterator == null) {
                                runQuery();
                            }
                            return scoresIterator.hasNext();
                        }

                        @Override
                        public Float next() {
                            if (scoresCount++ == size) {
                                throw new NoSuchElementException();
                            }
                            if (scoresIterator == null) {
                                runQuery();
                            }
                            return scoresIterator.next();
                        }
                    };
                }

                @Override
                public boolean hasNext() {
                    return hasNextBatch;
                }

                @Override
                public int size() {
                    return size;
                }

                private void runQuery() {
                    if (keysIterator == null && scoresIterator == null) {
                        Map<NodeKey, Float> results = search(searcher -> getSearchResults(searcher), true);
                        keysIterator = results.keySet().iterator();
                        scoresIterator = results.values().iterator();
                    }
                }
            };
        }

        private Map<NodeKey, Float> getSearchResults(IndexSearcher searcher) throws IOException {
            IdsCollector collector = new IdsCollector(scoreDocuments, searcher.getIndexReader().maxDoc());
            searcher.search(query, collector);
            BitSet docIds = collector.documents();
            Map<NodeKey, Float> results = new LinkedHashMap<>();
            for (int i = docIds.nextSetBit(0); i >= 0; i = docIds.nextSetBit(i + 1)) {
                try {
                    // this is a valid document which we have to load...
                    Document document = searcher.doc(i, ID_FIELD_SET);
                    String id = document.getBinaryValue(FieldUtil.ID).utf8ToString();
                    Float score = collector.scoreFor(i);
                    results.put(new NodeKey(id), score);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return results;
        }

        @Override
        public void close() {
            keysIterator = null;
            scoresIterator = null;
            query = null;
        }

        @Override
        public String toString() {
            final StringBuilder sb = new StringBuilder(query.toString());
            sb.append("=").append("[").append(size).append(" keys]");
            return sb.toString();
        }
    }

    private static class IdsCollector extends SimpleCollector {

        private final float[] scores;

        private BitSet docHits;
        private Scorer scorer;
        private int docBase;
        private Bits liveDocs;

        protected IdsCollector(boolean scoring, int maxDoc) {
            this.scores = scoring ? new float[maxDoc] : null;
            this.docHits = new BitSet(maxDoc);
        }

        @Override
        protected void doSetNextReader(LeafReaderContext context) throws IOException {
            this.docBase = context.docBase;
            this.liveDocs = context.reader().getLiveDocs();
        }

        @Override
        public void setScorer(Scorer scorer) throws IOException {
            if (isScoring()) {
                this.scorer = scorer;
            }
        }

        @Override
        public void collect(int doc) throws IOException {
            if (liveDocs != null && !liveDocs.get(doc)) {
                // 'doc' has been deleted, so ignore it
                return;
            }
            int docId = doc + docBase;
            if (isScoring()) {
                scores[docId] = scorer.score();
            }
            docHits.set(docId);
        }

        @Override
        public boolean needsScores() {
            return isScoring();
        }

        protected BitSet documents() {
            return docHits;
        }

        protected Float scoreFor(int docId) {
            return isScoring() ? scores[docId] : DEFAULT_SCORE;
        }

        private boolean isScoring() {
            return scores != null;
        }
    }

    @FunctionalInterface
    protected interface Searchable<T> {
        T search(IndexSearcher searcher) throws IOException;
    }

    private static class DocumentByIdCollector extends SimpleCollector {

        private LeafReader currentReader;
        private Document document;

        @Override
        public void collect(int doc) throws IOException {
            if (document == null) {
                document = currentReader.document(doc);
            } else {
                throw new CollectionTerminatedException();
            }
        }

        @Override
        protected void doSetNextReader(LeafReaderContext context) throws IOException {
            if (document != null) {
                // we already found our document, so terminate
                throw new CollectionTerminatedException();
            }
            currentReader = context.reader();
        }

        @Override
        public boolean needsScores() {
            return false;
        }

        protected Document document() {
            return document;
        }
    }
}