vtk.repository.index.IndexManager.java Source code

Introduction

Here is the source code for vtk.repository.index.IndexManager.java
Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package vtk.repository.index;

import java.io.File;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Required;
import vtk.util.threads.Mutex;

/**
 * Lower level management of single Lucene index.
 * 
 * <p>Handles the following, more or less:
 * <ul>
 *   <li>Access to <code>IndexSearcher</code> and <code>IndexWriter</code> instances.
 *   <li>Management of index searcher instances for refresh and warming.
 *   <li>Index life-cycle (initialization/open/close) and commit.
 *   <li>Index storage details.
 *   <li>Optional mutex-locking for exclusive write access between threads that modify
 * the index.
 * </ul>
 * 
 * <p>Configurable properties:
 * <ul>
 *   <li><code>indexPath</code> - absolute path to file system directory where index should be created.
 *   <li>TODO complete me.
 * </ul>
 * 
 * TODO we should support a lazy reader with longer time between refreshes for efficient
 * anonymous queries which do not requre completely up-to-date results.
 */
public class IndexManager implements InitializingBean, DisposableBean {

    private final Log logger = LogFactory.getLog(IndexManager.class.getName());

    private File storageRootPath;
    private String storageId;
    private boolean batchIndexingMode = false;
    private int maxLockAcquireTimeOnShutdown = 30; // 30 seconds max to wait for mutex lock when shutting down
    private boolean forceUnlock = true;
    private boolean closeAfterInit = false;

    // Lucene directory abstraction
    private volatile Directory directory;

    // The IndexWriter instance used to modify the index.
    private IndexWriter writer;

    // Manages searching and general reading of index
    private SearcherManager searcherManager;

    // Searcher factory is used to create new IndexSearcher instances in SearcherManager
    private SearcherFactory searcherFactory;

    // Internal mutex lock backing the public locking functions of this class.
    private final Mutex lock = new Mutex();

    /**
     * Open the underlying index for writing and searching.
     * @throws IOException in case of errors with index or storage.
     */
    public synchronized void open() throws IOException {
        open(false);
    }

    /**
     * Open the underlying index for writing and searching, optionally specify
     * if a new index should be created at the time of opening.
     * 
     * @param createNewIndex if <code>true</code>, then any existing index at the
     * storage location is cleared and a new and empty index is created. Use
     * with caution.
     * @throws IOException in case of errors with index or storage.
     */
    public synchronized void open(boolean createNewIndex) throws IOException {
        if (!isClosed()) {
            return;
        }

        if (storageRootPath != null && storageId != null) {
            directory = makeDirectory(initStorage(storageRootPath, storageId));
        } else {
            directory = makeDirectory(null);
        }

        checkIndexLock(directory);

        initIndex(directory, createNewIndex);

        writer = new IndexWriter(directory, newIndexWriterConfig());

        // For Lucene NRT (Near Real Time) searching, the writer instance could be provided to
        // the searcher factory here. However, due to how we update documents, it is
        // undesirable to let searches see uncomitted index changes. So we simply
        // don't use NRT.
        searcherManager = new SearcherManager(directory, searcherFactory);
    }

    /**
     * Close the index to free resources (I/O and memory). After this method
     * has returned, the index must be {@link #open() opened} again to be used.
     * 
     * Calling this method on an already closed index will have no effect, and
     * will not produce any errors.
     * 
     * @throws IOException in case of errors closing down.
     */
    public synchronized void close() throws IOException {
        if (searcherManager != null) {
            searcherManager.close();
        }

        if (writer != null) {
            writer.close();
            writer = null;
        }

        if (directory != null) {
            directory.close();
            directory = null;
        }

        // Unset searchManager last after nullifying directory to avoid NPE for
        // concurrent searching while this close method runs.
        searcherManager = null;
    }

    /**
     * Check whether the index is currently closed.
     * @return <code>true</code> if the index is closed.
     */
    public boolean isClosed() {
        return directory == null;
    }

    /**
     * Commit all changes made through the provided {@link #getIndexWriter() IndexWriter }
     * instance and refresh readers for searching.
     * 
     * This call will block until all changes are flushed to index and reader
     * instances have been refreshed.
     * 
     * @throws IOException in case of errors comitting the changes or if index is closed.
     */
    public synchronized void commit() throws IOException {
        if (isClosed()) {
            throw new IOException("Index is closed");
        }

        writer.commit();
        searcherManager.maybeRefreshBlocking();
    }

    /**
     * Get access to {@link IndexWriter} instance.
     * 
     * @return a (shared) <code>IndexWriter</code> instance.
     * @throws IOException in case the index is closed
     */
    public synchronized IndexWriter getIndexWriter() throws IOException {
        if (isClosed()) {
            throw new IOException("Index is closed");
        }

        return writer;
    }

    /**
     * Obtain an index searcher.
     * 
     * You should release the obtained searcher after use in a finally block, by calling
     * {@link #releaseIndexSearcher(org.apache.lucene.search.IndexSearcher) }.
     * 
     * @return 
     * @throws IOException 
     */
    public IndexSearcher getIndexSearcher() throws IOException {
        if (isClosed()) {
            throw new IOException("Index is closed");
        }

        // Guard against possible NPE if index is being closed at the same time
        // this method is called
        SearcherManager sm = searcherManager;
        if (sm != null) {
            return sm.acquire();
        } else {
            throw new IOException("Index is closed");
        }
    }

    /**
     * Release a search previously obtained with {@link #getIndexSearcher() }. This
     * is necessary to free resources and should be done in a <code>finally</code> block
     * whenever a searcher is used.
     * 
     * @param searcher the index searcher. May be <code>null</code>, and in that
     * case the call does nothing.
     * 
     * @throws IOException in case of errors with index
     */
    public void releaseIndexSearcher(IndexSearcher searcher) throws IOException {
        if (searcher == null)
            return;

        // Guard against possible NPE at closing time
        SearcherManager sm = searcherManager;
        if (sm != null) {
            sm.release(searcher);
        }

        logger.debug("searcher.getIndexReader().getRefCount() = " + searcher.getIndexReader().getRefCount());
    }

    /** Check index filesystem-lock, force-unlock if requested. */
    private void checkIndexLock(Directory directory) throws IOException {
        if (IndexWriter.isLocked(directory)) {
            // See if we should try to force-unlock it
            if (forceUnlock) {
                logger.warn("Index directory is locked, forcing unlock.");
                IndexWriter.unlock(directory);
            } else {
                throw new IOException(
                        "Index directory " + directory + " is locked and 'forceUnlock' is set to 'false'.");
            }
        }
    }

    private void initIndex(Directory directory, boolean createNew) throws IOException {
        if (!DirectoryReader.indexExists(directory) || createNew) {
            IndexWriterConfig conf = newIndexWriterConfig();
            conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            new IndexWriter(directory, conf).close();
            logger.info("Created new index in directory " + directory);
        }
    }

    private File initStorage(File storageRootPath, String storageId) throws IOException {

        File storageDirectory = new File(storageRootPath, storageId);

        if (storageDirectory.isDirectory()) {
            if (!storageDirectory.canWrite()) {
                throw new IOException(
                        "Resolved storage directory '" + storageDirectory.getAbsolutePath() + "' is not writable");
            }
        } else if (storageDirectory.isFile()) {
            throw new IOException(
                    "Resolved storage directory '" + storageDirectory.getAbsolutePath() + "' is a file");
        } else {
            // Directory does not exist, we need to create it.
            if (!storageDirectory.mkdir()) {
                throw new IOException(
                        "Failed to create resolved storage directory '" + storageDirectory.getAbsolutePath() + "'");
            }
        }

        return storageDirectory;
    }

    private Directory makeDirectory(File path) throws IOException {
        if (path != null) {
            return FSDirectory.open(path);
        } else {
            logger.warn("No storage path provided, using volatile memory index.");
            return new RAMDirectory();
        }
    }

    private IndexWriterConfig newIndexWriterConfig() {
        IndexWriterConfig cfg = new IndexWriterConfig(Version.LATEST, new KeywordAnalyzer());
        cfg.setMaxThreadStates(1); // We have only at most one writing thread.

        // XXX switch to LogByteSizeMergePolicy if problems with (default) TieredMergePolicy arise.
        //        LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
        //        mp.setMergeFactor(this.batchIndexingMode ? 25: 5);
        //        cfg.setMergePolicy(mp);

        cfg.setRAMBufferSizeMB(batchIndexingMode ? 32.0 : IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
        return cfg;
    }

    /**
     * Execute a thorough low-level index check. Can be time consuming.
     * 
     * This method should only be called with mutex lock acquired, to ensure
     * directory is not modified while check is running. 
     * @return <code>true</code> if no problems were found with the index, <code>false</code>
     * if problems were detected. Details of any problems are not available, but
     * a rebuild is likely wise to do if this method returns <code>false</code>.
     * @throws IOException in case of errors during check or if index is closed.
     * Assume the index is corrupt if this occurs for any other reason than index
     * being closed.
     */
    public boolean checkIndex() throws IOException {
        if (isClosed()) {
            throw new IOException("Index is closed");
        }

        CheckIndex ci = new CheckIndex(directory);
        CheckIndex.Status status = ci.checkIndex();
        return status.clean;
    }

    /**
     * Explicit locking. Should be acquired before doing any write or life cycle
     * operations on index. This locking records no thread ownership and is
     * present merely as a tool for the caller to ensure exclusive index access.
     */
    public boolean lockAcquire() {
        if (this.lock.lock()) {
            if (logger.isDebugEnabled()) {
                logger.debug("Thread '" + Thread.currentThread().getName() + "' got lock on index '"
                        + this.storageId + "'.");
            }
            return true;
        }

        return false;
    }

    /**
     * Explicit write locking with timeout.
     * 
     * @param timeout timeout in milliseconds
     * @return <code>true</code> if lock was successfully obtained, <code>false</code>
     * otherwise.
     * 
     * @see #lockAcquire() 
     */
    public boolean lockAttempt(long timeout) {
        if (this.lock.tryLock(timeout, TimeUnit.MILLISECONDS)) {
            if (logger.isDebugEnabled()) {
                logger.debug("Thread '" + Thread.currentThread().getName() + "' got lock on index '"
                        + this.storageId + "'.");
            }
            return true;

        } else if (logger.isDebugEnabled()) {
            logger.debug("Thread '" + Thread.currentThread().getName() + "' failed to acquire lock on index '"
                    + this.storageId + "' after waiting for " + timeout + " ms");
        }

        return false;
    }

    /**
     * Release explicit write lock.
     */
    public void lockRelease() {
        this.lock.unlock();
        if (logger.isDebugEnabled()) {
            logger.debug("Thread '" + Thread.currentThread().getName() + "' unlocked write lock on index '" + this
                    + "'.");
        }
    }

    /**
     * 
     * @return storage id as a string
     */
    public String getStorageId() {
        return storageId;
    }

    // Framework life-cycle
    @Override
    public void afterPropertiesSet() throws IOException {
        open();
        if (closeAfterInit) {
            close();
        }
    }

    // Framework life-cycle
    @Override
    public void destroy() throws Exception {
        logger.info("Index shutdown, waiting for write lock on index '" + this.storageId + "' ..");
        if (lockAttempt(this.maxLockAcquireTimeOnShutdown * 1000)) {
            logger.info("Got write lock on index '" + this.storageId + "', closing down.");

            close();
        } else {
            logger.warn("Failed to acquire the write lock on index '" + this.storageId + "' within "
                    + " the time limit of " + this.maxLockAcquireTimeOnShutdown
                    + " seconds, index might be corrupted.");
        }
    }

    /**
     * Set whether the underlying index should be explicitly closed
     * after the <em>first</em> initialization is complete. Can be used for
     * index instances you don't want to have open before it is  needed.
     * Such an instance must be initialized again before it can be used
     * (by calling {@link #open() }). 
     * @param closeAfterInit <code>true</code> if index should be closed after
     * initialization is complete.
     */
    public void setCloseAfterInit(boolean closeAfterInit) {
        this.closeAfterInit = closeAfterInit;
    }

    /**
     * Set the {@link SearcherFactory} to be used for creating new {@link IndexSearcher}
     * instances. (Provide a factory that does warmup for better performance
     * after write operations.)
     * 
     * @param searcherFactory the <code>SearcherFactory</code> instance.
     */
    public void setSearcherFactory(SearcherFactory searcherFactory) {
        this.searcherFactory = searcherFactory;
    }

    /**
     * Set the storage id of this index. The storage id is the name of the index
     * directory created under the storage root path. Thus the provided id should
     * be file name friendly and not contain for instance a slash character.
     * 
     * @param storageId the storage Id as a string.
     */
    @Required
    public void setStorageId(String storageId) {
        this.storageId = storageId;
    }

    @Required
    public void setStorageRootPath(File rootPath) {
        this.storageRootPath = rootPath;
    }

    /**
     * @param batchIndexingMode the batchIndexingMode to set
     */
    public void setBatchIndexingMode(boolean batchIndexingMode) {
        this.batchIndexingMode = batchIndexingMode;
    }

    /**
     * @param maxLockAcquireTimeOnShutdown the maxLockAcquireTimeOnShutdown to set
     */
    public void setMaxLockAcquireTimeOnShutdown(int maxLockAcquireTimeOnShutdown) {
        this.maxLockAcquireTimeOnShutdown = maxLockAcquireTimeOnShutdown;
    }

    /**
     * @param forceUnlock the forceUnlock to set
     */
    public void setForceUnlock(boolean forceUnlock) {
        this.forceUnlock = forceUnlock;
    }

}