org.apache.lucene.index.ParallelCompositeReader.java Source code

Introduction

Here is the source code for org.apache.lucene.index.ParallelCompositeReader.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Set;

/** An {@link CompositeReader} which reads multiple, parallel indexes.  Each
 * index added must have the same number of documents, and exactly the same
 * number of leaves (with equal {@code maxDoc}), but typically each contains
 * different fields. Deletions are taken from the first reader. Each document
 * contains the union of the fields of all documents with the same document
 * number.  When searching, matches for a query term are from the first index
 * added that has the field.
 *
 * <p>This is useful, e.g., with collections that have large fields which
 * change rarely and small fields that change more frequently.  The smaller
 * fields may be re-indexed in a new index and both indexes may be searched
 * together.
 * 
 * <p><strong>Warning:</strong> It is up to you to make sure all indexes
 * are created and modified the same way. For example, if you add
 * documents to one index, you need to add the same documents in the
 * same order to the other indexes. <em>Failure to do so will result in
 * undefined behavior</em>.
 * A good strategy to create suitable indexes with {@link IndexWriter} is to use
 * {@link LogDocMergePolicy}, as this one does not reorder documents
 * during merging (like {@code TieredMergePolicy}) and triggers merges
 * by number of documents per segment. If you use different {@link MergePolicy}s
 * it might happen that the segment structure of your index is no longer predictable.
 */
public class ParallelCompositeReader extends BaseCompositeReader<LeafReader> {
    private final boolean closeSubReaders;
    private final Set<IndexReader> completeReaderSet = Collections
            .newSetFromMap(new IdentityHashMap<IndexReader, Boolean>());
    private final CacheHelper cacheHelper;

    /** Create a ParallelCompositeReader based on the provided
     *  readers; auto-closes the given readers on {@link #close()}. */
    public ParallelCompositeReader(CompositeReader... readers) throws IOException {
        this(true, readers);
    }

    /** Create a ParallelCompositeReader based on the provided
     *  readers. */
    public ParallelCompositeReader(boolean closeSubReaders, CompositeReader... readers) throws IOException {
        this(closeSubReaders, readers, readers);
    }

    /** Expert: create a ParallelCompositeReader based on the provided
     *  readers and storedFieldReaders; when a document is
     *  loaded, only storedFieldsReaders will be used. */
    public ParallelCompositeReader(boolean closeSubReaders, CompositeReader[] readers,
            CompositeReader[] storedFieldReaders) throws IOException {
        super(prepareLeafReaders(readers, storedFieldReaders));
        this.closeSubReaders = closeSubReaders;
        Collections.addAll(completeReaderSet, readers);
        Collections.addAll(completeReaderSet, storedFieldReaders);
        // update ref-counts (like MultiReader):
        if (!closeSubReaders) {
            for (final IndexReader reader : completeReaderSet) {
                reader.incRef();
            }
        }
        // finally add our own synthetic readers, so we close or decRef them, too (it does not matter what we do)
        completeReaderSet.addAll(getSequentialSubReaders());
        // ParallelReader instances can be short-lived, which would make caching trappy
        // so we do not cache on them, unless they wrap a single reader in which
        // case we delegate
        if (readers.length == 1 && storedFieldReaders.length == 1 && readers[0] == storedFieldReaders[0]) {
            cacheHelper = readers[0].getReaderCacheHelper();
        } else {
            cacheHelper = null;
        }
    }

    private static LeafReader[] prepareLeafReaders(CompositeReader[] readers, CompositeReader[] storedFieldsReaders)
            throws IOException {
        if (readers.length == 0) {
            if (storedFieldsReaders.length > 0)
                throw new IllegalArgumentException(
                        "There must be at least one main reader if storedFieldsReaders are used.");
            return new LeafReader[0];
        } else {
            final List<? extends LeafReaderContext> firstLeaves = readers[0].leaves();

            // check compatibility:
            final int maxDoc = readers[0].maxDoc(), noLeaves = firstLeaves.size();
            final int[] leafMaxDoc = new int[noLeaves];
            for (int i = 0; i < noLeaves; i++) {
                final LeafReader r = firstLeaves.get(i).reader();
                leafMaxDoc[i] = r.maxDoc();
            }
            validate(readers, maxDoc, leafMaxDoc);
            validate(storedFieldsReaders, maxDoc, leafMaxDoc);

            // flatten structure of each Composite to just LeafReader[]
            // and combine parallel structure with ParallelLeafReaders:
            final LeafReader[] wrappedLeaves = new LeafReader[noLeaves];
            for (int i = 0; i < wrappedLeaves.length; i++) {
                final LeafReader[] subs = new LeafReader[readers.length];
                for (int j = 0; j < readers.length; j++) {
                    subs[j] = readers[j].leaves().get(i).reader();
                }
                final LeafReader[] storedSubs = new LeafReader[storedFieldsReaders.length];
                for (int j = 0; j < storedFieldsReaders.length; j++) {
                    storedSubs[j] = storedFieldsReaders[j].leaves().get(i).reader();
                }
                // We pass true for closeSubs and we prevent touching of subreaders in doClose():
                // By this the synthetic throw-away readers used here are completely invisible to ref-counting
                wrappedLeaves[i] = new ParallelLeafReader(true, subs, storedSubs) {
                    @Override
                    protected void doClose() {
                    }
                };
            }
            return wrappedLeaves;
        }
    }

    private static void validate(CompositeReader[] readers, int maxDoc, int[] leafMaxDoc) {
        for (int i = 0; i < readers.length; i++) {
            final CompositeReader reader = readers[i];
            final List<? extends LeafReaderContext> subs = reader.leaves();
            if (reader.maxDoc() != maxDoc) {
                throw new IllegalArgumentException(
                        "All readers must have same maxDoc: " + maxDoc + "!=" + reader.maxDoc());
            }
            final int noSubs = subs.size();
            if (noSubs != leafMaxDoc.length) {
                throw new IllegalArgumentException("All readers must have same number of leaf readers");
            }
            for (int subIDX = 0; subIDX < noSubs; subIDX++) {
                final LeafReader r = subs.get(subIDX).reader();
                if (r.maxDoc() != leafMaxDoc[subIDX]) {
                    throw new IllegalArgumentException(
                            "All leaf readers must have same corresponding subReader maxDoc");
                }
            }
        }
    }

    @Override
    public CacheHelper getReaderCacheHelper() {
        return cacheHelper;
    }

    @Override
    protected synchronized void doClose() throws IOException {
        IOException ioe = null;
        for (final IndexReader reader : completeReaderSet) {
            try {
                if (closeSubReaders) {
                    reader.close();
                } else {
                    reader.decRef();
                }
            } catch (IOException e) {
                if (ioe == null)
                    ioe = e;
            }
        }
        // throw the first exception
        if (ioe != null)
            throw ioe;
    }
}