com.helger.pd.indexer.lucene.PDLucene.java Source code

Introduction

Here is the source code for com.helger.pd.indexer.lucene.PDLucene.java
Source

/**
 * Copyright (C) 2015-2016 Philip Helger (www.helger.com)
 * philip[at]helger[dot]com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.helger.pd.indexer.lucene;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexNotFoundException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.helger.commons.callback.IThrowingCallable;
import com.helger.commons.callback.IThrowingRunnable;
import com.helger.commons.io.stream.StreamHelper;
import com.helger.commons.state.ESuccess;
import com.helger.photon.basic.app.io.WebFileIO;

/**
 * The singleton wrapper around the Lucene index to be used in PEPPOL Directory.
 *
 * @author Philip Helger
 */
public final class PDLucene implements Closeable, ILuceneDocumentProvider, ILuceneAnalyzerProvider {
    private static final Logger s_aLogger = LoggerFactory.getLogger(PDLucene.class);

    private final Lock m_aLock = new ReentrantLock();
    private final Directory m_aDir;
    private final Analyzer m_aAnalyzer;
    private final IndexWriter m_aIndexWriter;
    private DirectoryReader m_aIndexReader;
    private IndexReader m_aSearchReader;
    private IndexSearcher m_aSearcher;
    private final AtomicBoolean m_aClosing = new AtomicBoolean(false);
    private final AtomicInteger m_aWriterChanges = new AtomicInteger(0);

    @Nonnull
    public static File getLuceneIndexDir() {
        return WebFileIO.getDataIO().getFile("lucene-index");
    }

    @Nonnull
    public static Analyzer createAnalyzer() {
        return new StandardAnalyzer();
    }

    public PDLucene() throws IOException {
        // Where to store the index files
        final Path aPath = getLuceneIndexDir().toPath();
        m_aDir = FSDirectory.open(aPath);

        // Analyzer to use
        m_aAnalyzer = createAnalyzer();

        // Create the index writer
        final IndexWriterConfig aWriterConfig = new IndexWriterConfig(m_aAnalyzer);
        aWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        m_aIndexWriter = new IndexWriter(m_aDir, aWriterConfig);

        // Reader and searcher are opened on demand

        s_aLogger.info("Lucene index operating on " + aPath);
    }

    public void close() throws IOException {
        // Avoid double closing
        if (!m_aClosing.getAndSet(true)) {
            m_aLock.lock();
            try {
                // Start closing
                StreamHelper.close(m_aIndexReader);

                // Ensure to commit the writer in case of pending changes
                if (m_aIndexWriter != null && m_aIndexWriter.isOpen())
                    m_aIndexWriter.commit();
                StreamHelper.close(m_aIndexWriter);
                StreamHelper.close(m_aDir);
                s_aLogger.info("Closed Lucene reader/writer/directory");
            } finally {
                m_aLock.unlock();
            }
        }
    }

    public boolean isClosing() {
        return m_aClosing.get();
    }

    private void _checkClosing() {
        if (isClosing())
            throw new IllegalStateException("The Lucene index is shutting down so no access is possible");
    }

    /**
     * @return The analyzer to be used for all Lucene based actions
     */
    @Nonnull
    public Analyzer getAnalyzer() {
        _checkClosing();
        return m_aAnalyzer;
    }

    @Nonnull
    private IndexWriter _getWriter() {
        _checkClosing();
        return m_aIndexWriter;
    }

    @Nullable
    private DirectoryReader _getReader() throws IOException {
        _checkClosing();
        try {
            // Commit the writer changes only if a reader is requested
            if (m_aWriterChanges.intValue() > 0) {
                s_aLogger.info("Lazily committing " + m_aWriterChanges.intValue() + " changes to the Lucene index");
                _getWriter().commit();
                m_aWriterChanges.set(0);
            }

            // Is a new reader required because the index changed?
            final DirectoryReader aNewReader = m_aIndexReader != null
                    ? DirectoryReader.openIfChanged(m_aIndexReader)
                    : DirectoryReader.open(m_aDir);
            if (aNewReader != null) {
                // Something changed in the index
                m_aIndexReader = aNewReader;
                m_aSearcher = null;

                if (s_aLogger.isDebugEnabled())
                    s_aLogger.debug("Contents of index changed. Creating new index reader");
            }
            return m_aIndexReader;
        } catch (final IndexNotFoundException ex) {
            // No such index
            return null;
        }
    }

    /**
     * Get the Lucene {@link Document} matching the specified ID
     *
     * @param nDocID
     *        Document ID
     * @return <code>null</code> if no reader could be obtained or no such
     *         document exists.
     * @throws IOException
     *         On IO error
     */
    @Nullable
    public Document getDocument(final int nDocID) throws IOException {
        _checkClosing();

        if (s_aLogger.isDebugEnabled())
            s_aLogger.debug("getDocument(" + nDocID + ")");

        final IndexReader aReader = _getReader();
        if (aReader == null)
            return null;
        return aReader.document(nDocID);
    }

    /**
     * Get a searcher on this index.
     *
     * @return <code>null</code> if no reader or no searcher could be obtained
     * @throws IOException
     *         On IO error
     */
    @Nullable
    public IndexSearcher getSearcher() throws IOException {
        _checkClosing();
        final IndexReader aReader = _getReader();
        if (aReader == null) {
            // Index not readable
            return null;
        }

        if (m_aSearchReader == aReader) {
            // Reader did not change - use cached searcher
            assert m_aSearcher != null;
            return m_aSearcher;
        }

        // Create new searcher only if necessary
        m_aSearchReader = aReader;
        m_aSearcher = new IndexSearcher(aReader);
        return m_aSearcher;
    }

    /**
     * Updates a document by first deleting the document(s) containing
     * <code>term</code> and then adding the new document. The delete and then add
     * are atomic as seen by a reader on the same index (flush may happen only
     * after the add).
     *
     * @param aDelTerm
     *        the term to identify the document(s) to be deleted. May be
     *        <code>null</code>.
     * @param aDoc
     *        the document to be added May not be <code>null</code>.
     * @throws CorruptIndexException
     *         if the index is corrupt
     * @throws IOException
     *         if there is a low-level IO error
     */
    public void updateDocument(@Nullable final Term aDelTerm,
            @Nonnull final Iterable<? extends IndexableField> aDoc) throws IOException {
        _getWriter().updateDocument(aDelTerm, aDoc);
        m_aWriterChanges.incrementAndGet();
    }

    /**
     * Atomically deletes documents matching the provided delTerm and adds a block
     * of documents with sequentially assigned document IDs, such that an external
     * reader will see all or none of the documents.
     *
     * @param aDelTerm
     *        the term to identify the document(s) to be deleted. May be
     *        <code>null</code>.
     * @param aDocs
     *        the documents to be added. May not be <code>null</code>.
     * @throws CorruptIndexException
     *         if the index is corrupt
     * @throws IOException
     *         if there is a low-level IO error
     */
    public void updateDocuments(@Nullable final Term aDelTerm,
            @Nonnull final Iterable<? extends Iterable<? extends IndexableField>> aDocs) throws IOException {
        if (true) {
            // Delete and than add
            _getWriter().deleteDocuments(aDelTerm);
            _getWriter().updateDocuments(null, aDocs);
        } else {
            // Update directly
            _getWriter().updateDocuments(aDelTerm, aDocs);
        }
        m_aWriterChanges.incrementAndGet();
    }

    /**
     * Deletes the document(s) containing any of the terms. All given deletes are
     * applied and flushed atomically at the same time.
     *
     * @param aTerms
     *        array of terms to identify the documents to be deleted
     * @throws CorruptIndexException
     *         if the index is corrupt
     * @throws IOException
     *         if there is a low-level IO error
     */
    public void deleteDocuments(final Term... aTerms) throws IOException {
        _getWriter().deleteDocuments(aTerms);
        m_aWriterChanges.incrementAndGet();
    }

    /**
     * Run the provided action within a locked section.
     *
     * @param aRunnable
     *        Callback to be executed
     * @return {@link ESuccess#FAILURE} if the index is just closing
     * @throws IOException
     *         may be thrown by the callback
     */
    @Nonnull
    public ESuccess runAtomic(@Nonnull final IThrowingRunnable<IOException> aRunnable) throws IOException {
        m_aLock.lock();
        try {
            if (isClosing())
                return ESuccess.FAILURE;
            aRunnable.run();
        } finally {
            m_aLock.unlock();
        }
        return ESuccess.SUCCESS;
    }

    /**
     * Run the provided action within a locked section.<br>
     * Note: because of a problem with JDK 1.8.60 (+) command line compiler, this
     * method uses type "Exception" instead of "IOException" in the parameter
     * signature
     *
     * @param aRunnable
     *        Callback to be executed.
     * @return <code>null</code> if the index is just closing
     * @throws IOException
     *         may be thrown by the callback
     * @param <T>
     *        Result type
     */
    @Nullable
    public <T> T callAtomic(@Nonnull final IThrowingCallable<T, Exception> aRunnable) throws IOException {
        m_aLock.lock();
        try {
            if (!isClosing())
                return aRunnable.call();
        } catch (final Exception ex) {
            if (ex instanceof IOException)
                throw (IOException) ex;
            assert false;
        } finally {
            m_aLock.unlock();
        }
        return null;
    }
}