org.mulgara.resolver.lucene.LuceneIndexerCache.java Source code

Java tutorial

Introduction

Here is the source code for org.mulgara.resolver.lucene.LuceneIndexerCache.java

Source

/*
 * Copyright 2008 The Topaz Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 *
 * Contributions:
 */

package org.mulgara.resolver.lucene;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.mulgara.util.io.MappingUtil;

/**
 * A cache of lucene index-readers and index-writers. Opening a lucene index-reader or writer is
 * fairly expensive, so caching them can provide substantial performance gains. No cache-expiry
 * has been implemented, however; the assumption is that there will a limited number of lucene
 * models.
 *
 * <p>This also manages the setting up and removal of the index directory.
 *
 * @created 2008-09-28
 * @author Ronald Tschalr
 * @licence Apache License v2.0
 */
public class LuceneIndexerCache {

    private static final Logger logger = Logger.getLogger(LuceneIndexerCache.class);

    /* our caches */
    private final Stack<ReaderInfo> freeReaders = new Stack<ReaderInfo>();
    private final Stack<WriterInfo> freeWriters = new Stack<WriterInfo>();
    private final Map<IndexReader, ReaderInfo> allocdReaders = new HashMap<IndexReader, ReaderInfo>();
    private final Map<IndexWriter, WriterInfo> allocdWriters = new HashMap<IndexWriter, WriterInfo>();

    /** The Directory for Lucene.  */
    private final FSDirectory luceneIndexDirectory;

    /** Whether this cache has been closed */
    private boolean closed = false;

    /**
     * Create a new cache.
     *
     * @param directory the directory to use for the indexes; it is created if it does not exist.
     * @throws IOException if the directory is not readable, writable, or some other error occurs
     *                     trying to access it
     */
    public LuceneIndexerCache(String directory) throws IOException {
        luceneIndexDirectory = FSDirectory.open(createOrValidateDirectory(directory));
        clearLocks();

        if (!IndexReader.indexExists(luceneIndexDirectory)) {
            logger.debug(
                    "Fulltext string index does not yet exist in directory '" + directory + "', creating it now.");
            createIndex();
        }

        if (logger.isDebugEnabled()) {
            logger.debug("Fulltext string indexer cache initialized; directory =" + directory);
        }
    }

    private void createIndex() throws IOException {
        IndexWriter writer = getWriter();
        try {
            writer.commit();
        } catch (Throwable t) {
            writer.close();
            throw (t instanceof IOException) ? (IOException) t : new IOException("Error creating new index", t);
        }
        returnWriter(writer, false);
    }

    private static File createOrValidateDirectory(String directory) throws IOException {
        File indexDirectory = new File(directory);

        // make the directory if it doesn't exist
        if (!indexDirectory.exists()) {
            indexDirectory.mkdirs();
        }

        // ensure the index directory is a directory
        if (!indexDirectory.isDirectory()) {
            logger.fatal("The fulltext string index directory '" + directory + "' is not a directory!");
            throw new IOException("The fulltext string index directory '" + directory + "' is not a directory!");
        }

        // ensure the directory is writeable
        if (!indexDirectory.canWrite()) {
            logger.fatal("The fulltext string index directory '" + directory + "' is not writeable!");
            throw new IOException("The fulltext string index directory '" + directory + "' is not writeable!");
        }

        return indexDirectory;
    }

    private void clearLocks() throws IOException {
        if (IndexWriter.isLocked(luceneIndexDirectory)) {
            /* This shouldn't happen unless mulgara was shut down abruptly since mulgara has a single
             * writer lock.
             */
            logger.warn("Fulltext index directory '" + luceneIndexDirectory + "' is locked; forcibly unlocking");
            IndexWriter.unlock(luceneIndexDirectory);
        }

        if (IndexWriter.isLocked(luceneIndexDirectory)) {
            throw new IOException("Fulltext index directory '" + luceneIndexDirectory + "' is locked; "
                    + "forced unlock failed; giving up");
        }
    }

    /**
     * Get an index-reader. It must be returned via {@link #returnReader}.
     *
     * @return the index-reader
     */
    public synchronized IndexReader getReader() throws IOException {
        if (closed)
            throw new IllegalStateException("IndexerCache has been closed: " + luceneIndexDirectory);

        ReaderInfo ri = freeReaders.pop();
        if (ri == null) {
            ri = new ReaderInfo(luceneIndexDirectory);

            if (logger.isDebugEnabled())
                logger.debug("Creating new index-reader: " + ri.reader);
        } else if (ri.needsRefresh) {
            IndexReader reader = ri.reader.reopen();
            if (reader != ri.reader) {
                forceClose(ri.reader);
                ri = new ReaderInfo(reader);

                if (logger.isDebugEnabled())
                    logger.debug("Refreshed index-reader: " + ri.reader);
            } else {
                ri.needsRefresh = false;
            }
        } else {
            if (logger.isDebugEnabled())
                logger.debug("Reusing index-reader: " + ri.reader);
        }

        allocdReaders.put(ri.reader, ri);
        return ri.reader;
    }

    /**
     * Get an index-writer. It must be returned via {@link #returnWriter}.
     *
     * @return the index-writer
     */
    public synchronized IndexWriter getWriter() throws IOException {
        if (closed)
            throw new IllegalStateException("IndexerCache has been closed: " + luceneIndexDirectory);

        WriterInfo wi = freeWriters.pop();
        if (wi == null) {
            wi = new WriterInfo(luceneIndexDirectory);

            if (logger.isDebugEnabled())
                logger.debug("Created new index-writer: " + wi.writer);
        } else if (wi.needsRefresh) {
            wi.writer.close();
            wi = new WriterInfo(luceneIndexDirectory);

            if (logger.isDebugEnabled())
                logger.debug("Refreshed index-writer: " + wi.writer);
        } else {
            if (logger.isDebugEnabled())
                logger.debug("Reusing index-writer: " + wi.writer);
        }

        allocdWriters.put(wi.writer, wi);
        return wi.writer;
    }

    /**
     * Return an index-reader to the cache. It must have been previously retrieved through {@link
     * #getReader}.
     *
     * @param reader the reader to return
     * @param close if true the reader is closed and not returned to the pool
     */
    public synchronized void returnReader(IndexReader reader, boolean close) {
        ReaderInfo ri = allocdReaders.remove(reader);

        if (close || closed) {
            try {
                forceClose(reader);
                if (logger.isDebugEnabled())
                    logger.debug("Closed index-reader: " + reader);
            } catch (IOException ioe) {
                logger.warn("Error closing index-reader: " + reader);
            }
        } else {
            freeReaders.push(ri);
            if (logger.isDebugEnabled())
                logger.debug("Returned index-reader: " + reader);
        }
    }

    /**
     * Return an index-writer to the cache. It must have been previously retrieved through {@link
     * #getWriter}.
     *
     * @param writer the writer to return
     * @param close if true the writer is closed and not returned to the pool
     */
    public synchronized void returnWriter(IndexWriter writer, boolean close) {
        WriterInfo wi = allocdWriters.remove(writer);

        if (close || closed) {
            try {
                writer.close();
                if (logger.isDebugEnabled())
                    logger.debug("Closed index-writer: " + writer);
            } catch (IOException ioe) {
                logger.warn("Error closing index-writer: " + writer);
            }
        } else {
            freeWriters.push(wi);
            if (logger.isDebugEnabled())
                logger.debug("Returned index-writer: " + writer);
        }
    }

    /**
     * Notify the cache that the index has been modified. All newly returned indexers will be
     * appropriately refreshed.
     *
     * @param writer the writer that made the modification
     */
    public synchronized void indexModified(IndexWriter writer) {
        for (RefreshableObject ro : freeReaders)
            ro.needsRefresh = true;
        for (RefreshableObject ro : freeWriters)
            ro.needsRefresh = true;
        for (RefreshableObject ro : allocdReaders.values())
            ro.needsRefresh = true;
        for (WriterInfo wi : allocdWriters.values()) {
            if (wi.writer != writer)
                wi.needsRefresh = true;
        }

        if (logger.isDebugEnabled())
            logger.debug("All indexers marked for refresh");
    }

    /**
     * @return the directory being used for the indexes
     */
    public String getDirectory() {
        return luceneIndexDirectory.toString();
    }

    /**
     * Remove all index files from the current initialised directory. WARNING : All
     * files are removed in the specified directory. This is probably only useful for
     * testing. See {@link FullTextStringIndex#removeAll} for an alternate solution.
     *
     * @return return true if successful at removing all index files
     * @throws IOException if an exception occurs while attempting to delete the files
     */
    public synchronized boolean removeAllIndexes() throws IOException {
        if (allocdWriters.size() > 0 || allocdReaders.size() > 0) {
            logger.warn("Attempting to remove all indexes while readers or writers are still active");
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Removing all indexes from " + luceneIndexDirectory);
        }

        for (String file : luceneIndexDirectory.listAll()) {
            delete(luceneIndexDirectory, file);
        }

        return luceneIndexDirectory.getDirectory().delete();
    }

    /**
     * Close this cache. All pooled index readers/writers are closed; readers/writers that are still
     * in use will be closed upon being returned.
     */
    public synchronized void close() {
        if (allocdWriters.size() > 0 || allocdReaders.size() > 0) {
            logger.warn("Attempting to close indexer-cache while readers or writers are still active");
        }

        closed = true;

        for (ReaderInfo ri : freeReaders) {
            try {
                forceClose(ri.reader);
            } catch (IOException ioe) {
                logger.error("Error closing index-reader: " + ri.reader, ioe);
            }
        }
        freeReaders.clear(); // Let them be GC'ed. This is important for mapped files.

        for (WriterInfo wi : freeWriters) {
            try {
                wi.writer.close();
            } catch (IOException ioe) {
                logger.error("Error closing index-writer: " + wi.writer, ioe);
            }
        }
        freeWriters.clear(); // Let them be GC'ed. This is important for mapped files.

        if (logger.isDebugEnabled())
            logger.debug("IndexerCacher closed: " + luceneIndexDirectory);
    }

    private static abstract class RefreshableObject {
        public boolean needsRefresh = false;
    }

    private static class ReaderInfo extends RefreshableObject {
        public final IndexReader reader;

        public ReaderInfo(Directory directory) throws IOException {
            reader = IndexReader.open(directory, true);
        }

        public ReaderInfo(IndexReader reader) {
            this.reader = reader;
        }
    }

    private static class WriterInfo extends RefreshableObject {
        public final IndexWriter writer;

        public WriterInfo(Directory directory) throws IOException {
            Version v = LuceneResolver.LUCENE_VERSION;
            writer = new IndexWriter(directory, new IndexWriterConfig(v, new StandardAnalyzer(v)));
        }
    }

    private static class Stack<T> extends ArrayList<T> {
        /** Serialization ID */
        private static final long serialVersionUID = -8597253123267228667L;

        public void push(T obj) {
            add(obj);
        }

        public T pop() {
            return size() > 0 ? remove(size() - 1) : null;
        }
    }

    private static final int MAX_RETRIES = 10;

    /**
     * Attempt to force the deletion of a file. In the case of memory-mapped files, this might take a
     * few tries because mapped byte buffers aren't unmapped until they're garbage collected. 
     */
    private static void delete(FSDirectory dir, String file) throws IOException {
        int retries = MAX_RETRIES;
        for (;;) {
            try {
                dir.deleteFile(file);
                break;
            } catch (IOException e) {
                if (retries-- == 0) {
                    logger.warn("Couldn't delete file '" + file + "' from directory " + dir.getDirectory()
                            + " after " + MAX_RETRIES + " retries", e);
                    throw e;
                }
                MappingUtil.systemCleanup();
            }
        }
    }

    /** Force the reader closed by cleaning up outstanding references. 
     * @throws IOException */
    private static void forceClose(IndexReader reader) throws IOException {
        try {
            if (reader.getRefCount() > 1) {
                // This likely indicates a FullTextStringIndexTuples that was not properly closed.
                // Closing it now is likely to break any existing references to it.
                logger.warn("Forcing close of a reader that was returned to the cache with active references: "
                        + System.identityHashCode(reader));
                while (reader.getRefCount() > 1) {
                    reader.decRef();
                }
            }
        } catch (IOException e) {
            logger.error("Can't decrement reference count to abandoned reader", e);
            throw e;
        } finally {
            reader.close();
        }
    }
}