com.lucid.solr.sidecar.SidecarIndexReaderFactory.java Source code

Java tutorial

Introduction

Here is the source code for com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

Source

package com.lucid.solr.sidecar;
/*
 * Copyright 2013 LucidWorks Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.ParallelCompositeReader;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.IndexReaderFactory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.StandardIndexReaderFactory;
import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SidecarIndexReaderFactory extends IndexReaderFactory {
    // XXX nocommit
    public static String DEFAULT_DATA_PATH = ".";

    private static final Logger LOG = LoggerFactory.getLogger(SidecarIndexReaderFactory.class);

    public static enum Mode {
        /** Normalize against max boost value. */
        max,
        /** Normalize against a sum of all aggregated boost values, multiply by 100.
         */
        total,
        /** no normalization, use raw boost values. */
        none
    };

    StandardIndexReaderFactory standardFactory;
    String sidecarIndexLocation = "sidecar-index";
    String sourceCollection;
    String boostField;
    String docIdField;
    File sidecarIndex;
    boolean enabled;
    Mode mode = Mode.max;
    float multiplier;
    SolrCore currentCore;
    Set<String> parallelFields = new HashSet<String>();

    @Override
    public void init(NamedList args) {
        super.init(args);
        docIdField = (String) args.get("docIdField");
        sourceCollection = (String) args.get("sourceCollection");
        boostField = (String) args.get("boostField");
        enabled = (Boolean) args.get("enabled");
        //LOG.info("CIRF INIT called, enabled=" + enabled);
        String modeString = (String) args.get("mode");
        if (modeString != null) {
            try {
                Mode m = Mode.valueOf(modeString);
                mode = m;
            } catch (Exception e) {

            }
        }
        if (mode == Mode.total) {
            multiplier = 100.0f;
        } else {
            multiplier = 1.0f;
        }
        standardFactory = new StandardIndexReaderFactory();
        standardFactory.init(args);
    }

    // for testing
    public void init(String docIdField, String sourceCollection, String boostField, Mode mode) {
        this.docIdField = docIdField;
        this.sourceCollection = sourceCollection;
        this.boostField = boostField;
        this.enabled = true;
        standardFactory = new StandardIndexReaderFactory();
        this.mode = mode;
        if (mode == Mode.total) {
            multiplier = 100.0f;
        } else {
            multiplier = 1.0f;
        }
    }

    @Override
    public DirectoryReader newReader(IndexWriter writer, SolrCore core) throws IOException {
        return newReaderInternal(null, writer, core);
    }

    @Override
    public DirectoryReader newReader(Directory indexDir, SolrCore core) throws IOException {
        return newReaderInternal(indexDir, null, core);
    }

    DirectoryReader newReaderInternal(Directory indexDir, IndexWriter writer, SolrCore core) throws IOException {
        DirectoryReader main = null;
        if (writer != null) {
            main = standardFactory.newReader(writer, core);
        } else {
            main = standardFactory.newReader(indexDir, core);
        }
        if (!enabled) {
            LOG.info("Sidecar index not enabled");
            return main;
        }
        currentCore = core;
        CoreContainer container = core.getCoreDescriptor().getCoreContainer();
        SolrCore source = container.getCore(sourceCollection);
        if (source == null) {
            LOG.info("Source collection '" + sourceCollection + "' not present, sidecar index is disabled.");
            try {
                return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main),
                        sourceCollection, null);
            } catch (Exception e1) {
                LOG.warn("Unexpected exception, returning single main index", e1);
                return main;
            }
        }
        if (source.isClosed()) {
            LOG.info("Source collection '" + sourceCollection + "' is closed, sidecar index is disabled.");
            try {
                return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main),
                        sourceCollection, null);
            } catch (Exception e1) {
                LOG.warn("Unexpected exception, returning single main index", e1);
                return main;
            }
        }
        DirectoryReader parallel = null;
        SolrIndexSearcher searcher = null;
        try {
            searcher = source.getNewestSearcher(true).get();
            parallel = buildParallelReader(main, searcher, true);
        } finally {
            if (searcher != null) {
                LOG.info("-- closing " + searcher);
                searcher.close();
            }
            source.close();
        }
        return parallel;
    }

    DirectoryReader reopen(DirectoryReader newMain, boolean rebuild) throws IOException {
        CoreContainer container = currentCore.getCoreDescriptor().getCoreContainer();
        SolrCore source = container.getCore(sourceCollection);
        if (source == null) {
            LOG.info("Source collection '" + sourceCollection + "' not present, sidecar index is disabled.");
            try {
                return new SidecarIndexReader(this, newMain, null,
                        SidecarIndexReader.getSequentialSubReaders(newMain), sourceCollection, null);
            } catch (Exception e1) {
                LOG.warn("Unexpected exception, returning single main index", e1);
                return newMain;
            }
        }
        if (source.isClosed()) {
            LOG.info("Source collection '" + sourceCollection + "' is closed, sidecar index is disabled.");
            try {
                return new SidecarIndexReader(this, newMain, null,
                        SidecarIndexReader.getSequentialSubReaders(newMain), sourceCollection, null);
            } catch (Exception e1) {
                LOG.warn("Unexpected exception, returning single main index", e1);
                return newMain;
            }
        }
        DirectoryReader parallel = null;
        SolrIndexSearcher searcher = null;
        try {
            searcher = source.getNewestSearcher(true).get();
            parallel = buildParallelReader(newMain, searcher, rebuild);
        } finally {
            if (searcher != null && searcher.getIndexReader().getRefCount() > 0) {
                LOG.info("-- closing " + searcher);
                searcher.close();
            }
            if (source != null) {
                source.close();
            }
        }
        return parallel;
    }

    DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) {
        try {
            if (source == null) {
                throw new Exception("Source collection is missing.");
            }
            // create as a sibling path of the main index
            Directory d = main.directory();
            File primaryDir = null;
            if (d instanceof FSDirectory) {
                String path = ((FSDirectory) d).getDirectory().getPath();
                primaryDir = new File(path);
                sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation);
            } else {
                String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation
                        + "-" + System.currentTimeMillis();
                sidecarIndex = new File(secondaryPath);
            }
            // create a new tmp dir for the secondary indexes
            File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index");
            if (rebuild) {
                safeDelete(sidecarIndex);
            }
            parallelFields.addAll(source.getFieldNames());
            parallelFields.remove("id");
            LOG.debug("building a new index");
            Directory dir = FSDirectory.open(secondaryIndex);
            if (IndexWriter.isLocked(dir)) {
                // try forcing unlock
                try {
                    IndexWriter.unlock(dir);
                } catch (Exception e) {
                    LOG.warn("Failed to unlock " + secondaryIndex);
                }
            }
            int[] mergeTargets;
            AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main);
            if (subReaders == null || subReaders.length == 0) {
                mergeTargets = new int[] { main.maxDoc() };
            } else {
                mergeTargets = new int[subReaders.length];
                for (int i = 0; i < subReaders.length; i++) {
                    mergeTargets[i] = subReaders[i].maxDoc();
                }
            }
            Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion();
            IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer());
            //cfg.setInfoStream(System.err);
            cfg.setMergeScheduler(new SerialMergeScheduler());
            cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false));
            IndexWriter iw = new IndexWriter(dir, cfg);
            LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index");
            int boostedDocs = 0;
            Bits live = MultiFields.getLiveDocs(main);

            int targetPos = 0;
            int nextTarget = mergeTargets[targetPos];
            BytesRef idRef = new BytesRef();
            for (int i = 0; i < main.maxDoc(); i++) {
                if (i == nextTarget) {
                    iw.commit();
                    nextTarget = nextTarget + mergeTargets[++targetPos];
                }
                if (live != null && !live.get(i)) {
                    addDummy(iw); // this is required to preserve doc numbers.
                    continue;
                } else {
                    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField);
                    main.document(i, visitor);
                    Document doc = visitor.getDocument();
                    // get docId
                    String id = doc.get(docIdField);
                    if (id == null) {
                        LOG.debug("missing id, docNo=" + i);
                        addDummy(iw);
                        continue;
                    } else {
                        // find the data, if any
                        doc = lookup(source, id, idRef, parallelFields);
                        if (doc == null) {
                            LOG.debug("missing boost data, docId=" + id);
                            addDummy(iw);
                            continue;
                        } else {
                            LOG.debug("adding boost data, docId=" + id + ", b=" + doc);
                            iw.addDocument(doc);
                            boostedDocs++;
                        }
                    }
                }
            }
            iw.close();
            DirectoryReader other = DirectoryReader.open(dir);
            LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents.");
            SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex);
            return pr;
        } catch (Exception e) {
            LOG.warn("Unable to build parallel index: " + e.toString(), e);
            LOG.warn("Proceeding with single main index.");
            try {
                return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main),
                        sourceCollection, null);
            } catch (Exception e1) {
                LOG.warn("Unexpected exception, returning single main index", e1);
                return main;
            }
        }
    }

    private SidecarIndexReader createSidecarIndexReader(DirectoryReader main, DirectoryReader sidecar,
            String sourceCollection, File secondaryIndex) throws IOException {
        ParallelCompositeReader parallel = new ParallelCompositeReader(false, main, sidecar);
        AtomicReader[] parReaders = SidecarIndexReader.getSequentialSubReaders(parallel);
        AtomicReader[] readers = Arrays.copyOf(parReaders, parReaders.length, AtomicReader[].class);
        for (AtomicReader reader : readers) {
            reader.incRef();
        }
        parallel.close();
        SidecarIndexReader pr = new SidecarIndexReader(this, main,
                SidecarIndexReader.getSequentialSubReaders(sidecar), readers, sourceCollection, secondaryIndex);
        return pr;
    }

    private void addDummy(IndexWriter iw) throws IOException {
        Document dummy = new Document();
        Field f = new Field("_" + boostField, "d", StringField.TYPE_NOT_STORED);
        dummy.add(f);
        iw.addDocument(dummy);
    }

    private Document lookup(SolrIndexSearcher lookup, String id, BytesRef idRef, Set<String> fields)
            throws IOException {
        idRef.copyChars(id);
        Term t = new Term("id", idRef);
        if (lookup.docFreq(t) == 0) {
            return null;
        }

        int docId = lookup.getFirstMatch(t);
        if (docId == -1) {
            return null;
        }
        Document doc = lookup.doc(docId, fields);
        if (doc == null) {
            return null;
        }
        doc.removeFields("id");
        return doc;
    }

    // deal with locked files
    boolean safeDelete(File f) {
        if (f.isDirectory()) {
            File[] files = f.listFiles();
            boolean res = true;
            for (File f1 : files) {
                if (!safeDelete(f1)) {
                    res = false;
                    f1.deleteOnExit();
                }
            }
            if (!f.delete()) {
                f.deleteOnExit();
                res = false;
            }
            return res;
        }
        try {
            boolean res = f.delete();
            if (!res) {
                f.deleteOnExit();
            } else {
            }
            return res;
        } catch (Exception e) {
            LOG.warn("Can't delete old sidecar indexes: " + e.getMessage());
            return false;
        }
    }

    public String getSourceCollection() {
        return sourceCollection;
    }

    public String getBoostField() {
        return boostField;
    }

    public String getDocIdField() {
        return docIdField;
    }

    public boolean isEnabled() {
        return enabled;
    }
}