com.globalsight.ling.tm2.lucene.LuceneIndexWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.globalsight.ling.tm2.lucene.LuceneIndexWriter.java

Source

/**
 *  Copyright 2009 Welocalize, Inc. 
 *  
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  
 *  You may obtain a copy of the License at 
 *  http://www.apache.org/licenses/LICENSE-2.0
 *  
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *  
 */
package com.globalsight.ling.tm2.lucene;

import org.apache.log4j.Logger;

import com.globalsight.util.FileUtil;
import com.globalsight.util.GlobalSightLocale;
import com.globalsight.ling.tm2.population.SegmentsForSave.AddTuv;
import com.globalsight.ling.tm2.BaseTmTuv;
import com.globalsight.ling.tm3.core.TM3Tuv;
import com.globalsight.ling.tm3.integration.GSTuvData;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Iterator;
import java.util.Collection;
import java.util.Set;
import java.util.HashSet;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.analysis.Analyzer;

/**
 * LuceneIndexWriter is responsible for indexing TM segments.
 */

public class LuceneIndexWriter {
    private static final Logger c_logger = Logger.getLogger(LuceneIndexWriter.class);

    static protected String LOCK_NAME = "gs.lock";

    static {
        // set lock timeout to 3 minutes
        //IndexWriter.WRITE_LOCK_TIMEOUT = 180000L;
        //IndexWriter.COMMIT_LOCK_TIMEOUT = 180000L;
    }

    private long m_tmId;
    private Analyzer m_analyzer;

    private Lock m_lock = null;
    private Directory m_directory;
    private File m_indexDir;
    private boolean m_isFirst = false;

    public LuceneIndexWriter(long p_tmId, GlobalSightLocale p_locale) throws Exception {
        this(p_tmId, p_locale, false);
    }

    /**
     * The constructor gets a lock on the index directory.  If the
     * directory doesn't exist yet, it is created. When an operation
     * is done, close() method must be called to remove the lock.
     * 
     * @param p_tmId TM id
     * @param p_locale locale of the index
     */
    public LuceneIndexWriter(long p_tmId, GlobalSightLocale p_locale, boolean p_isFirst) throws Exception {
        m_tmId = p_tmId;
        m_analyzer = new GsPerFieldAnalyzer(p_locale);
        m_isFirst = p_isFirst;

        m_indexDir = LuceneUtil.getGoldTmIndexDirectory(p_tmId, p_locale, true);

        // get the directory. Note that the directory cannot be
        // created before getting a lock. Note2:
        // FSDirectory.getDirectory(dir, true) doesn't really create
        // index files. It just clear out the old index files and lock
        // file.
        m_directory = FSDirectory.open(m_indexDir);

        // get a lock on the directory
        m_lock = m_directory.makeLock(LOCK_NAME);
        if (!m_lock.obtain(180000L)) {
            m_lock = null;
            throw new IOException("Index locked for write: " + m_lock);
        }

        // only after gettting a lock, create the initial index files
        // if it doesn't exist.
        if (!DirectoryReader.indexExists(m_directory)) {
            IndexWriterConfig conf = new IndexWriterConfig(LuceneUtil.VERSION, m_analyzer);
            conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
            boolean initSuccess = false;
            IndexWriter writer = null;
            try {
                writer = new IndexWriter(m_directory, conf);
                initSuccess = true;
            } catch (IndexFormatTooOldException ie) {
                // delete too old index
                File[] files = m_indexDir.listFiles();
                if (files != null && files.length > 0) {
                    for (int i = 0; i < files.length; i++) {
                        File oneFile = files[i];
                        if (!LuceneIndexWriter.LOCK_NAME.equals(oneFile.getName())) {
                            oneFile.delete();
                        }
                    }
                }

                writer = new IndexWriter(m_directory, conf);
                initSuccess = true;
            } finally {
                if (!initSuccess) {
                    m_lock.release();
                }
                IOUtils.closeWhileHandlingException(writer);
            }
        }
    }

    /**
     * Removes all index for a Tm. This method removes all index files
     * and directories that belong to the specified Tm. This method
     * tries to remove Tm in a destructive fashion, meaning that, even
     * if the locks on the indexes have been obtained by other
     * process/thread, this method goes ahead and delete indexes. The
     * other process/thread with locks will get exceptions.
     *
     * @param p_tmId Tm id
     */
    static public void removeTm(long p_tmId) throws Exception {
        File tmIndexRoot = LuceneUtil.getGoldTmIndexParentDir(p_tmId);

        String[] localeNames = tmIndexRoot.list();

        // localeNames can be null if no index has been created.
        if (localeNames != null) {
            for (int i = 0; i < localeNames.length; i++) {
                File directory = new File(tmIndexRoot, localeNames[i]);

                try {
                    if (!directory.isDirectory()) {
                        FileUtil.deleteFile(directory);
                        continue;
                    }

                    // delete all index files and Lucene lock files
                    FSDirectory fsDirectory = FSDirectory.open(directory);

                    // delete our lock file
                    Lock lock = fsDirectory.makeLock(LOCK_NAME);
                    lock.release();

                    fsDirectory.close();

                    // delete index directory 
                    FileUtil.deleteFile(directory);

                    // clean cache if have
                    LuceneCache.cleanLuceneCache(directory);
                } catch (Exception e) {
                    c_logger.error("Exception is thrown while removing a Tm index. Ignoring...", e);
                }
            }
        }

        // delete index root directory
        tmIndexRoot.delete();
    }

    /**
     * Indexes segments. To maintain index integrity, indexes are at
     * first created in memory and merged into a file system index.
     *
     * @param p_tuvs List of BaseTmTuv, SegmentsForSave.AddTuv, or TM3Tuv
     * @param p_sourceLocale true if p_tuvs are source locale segments
     * @param p_indexTargetLocales true for TM3, false for TM2
     */
    public void index(List p_tuvs, boolean p_sourceLocale, boolean p_indexTargetLocales) throws Exception {
        IndexWriterConfig conf = new IndexWriterConfig(LuceneUtil.VERSION, m_analyzer);
        conf.setOpenMode(m_isFirst ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND);
        IndexWriter fsIndexWriter = new IndexWriter(m_directory, conf);

        try {
            for (Iterator it = p_tuvs.iterator(); it.hasNext();) {
                Object tuv = it.next();

                Document doc = tuv instanceof BaseTmTuv
                        ? createDocumentFromBaseTmTuv((BaseTmTuv) tuv, p_sourceLocale, p_indexTargetLocales)
                        : tuv instanceof AddTuv
                                ? createDocumentFromAddTuv((AddTuv) tuv, p_sourceLocale, p_indexTargetLocales)
                                : tuv instanceof TM3Tuv
                                        ? createDocumentFromTM3Tuv((TM3Tuv<GSTuvData>) tuv, p_sourceLocale,
                                                p_indexTargetLocales)
                                        : null;

                fsIndexWriter.addDocument(doc);
            }
        } finally {
            fsIndexWriter.close();
        }

        // clean cache if have
        LuceneCache.cleanLuceneCache(m_indexDir);
    }

    /*
     * Removes indexes for the specified segments
     *
     * @param p_tuvs List of BaseTmTuv or TM3Tuv
     */
    public void remove(Collection p_tuvs) throws Exception {
        IndexWriterConfig conf = new IndexWriterConfig(LuceneUtil.VERSION, m_analyzer);
        conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
        IndexWriter writer = new IndexWriter(m_directory, conf);

        try {
            for (Iterator it = p_tuvs.iterator(); it.hasNext();) {
                Object tuv = it.next();
                Long id = tuv instanceof BaseTmTuv ? ((BaseTmTuv) tuv).getId()
                        : tuv instanceof TM3Tuv ? ((TM3Tuv) tuv).getId() : null;

                Term term = new Term(TuvDocument.TUV_ID_FIELD, id.toString());
                writer.deleteDocuments(term);
            }
        } catch (Throwable e) {
            c_logger.error(e.getMessage(), e);
            //indexReader.undeleteAll();
            throw (e instanceof Exception ? (Exception) e : new Exception(e));
        } finally {
            writer.commit();
            writer.close();
        }

        // clean cache if have
        LuceneCache.cleanLuceneCache(m_indexDir);
    }

    public void close() throws Exception {
        m_lock.release();
        m_lock = null;

        m_directory.close();
    }

    /** Release the write lock, if needed. */
    protected void finalize() throws IOException {
        if (m_lock != null) {
            m_lock.release();
        }
        m_lock = null;
    }

    private Document createDocumentFromBaseTmTuv(BaseTmTuv p_tuv, boolean p_sourceLocale,
            boolean p_indexTargetLocales) throws Exception {
        Set<String> targetLocales = new HashSet<String>();
        if (p_indexTargetLocales) {
            for (BaseTmTuv tuv : p_tuv.getTu().getTuvs()) {
                if (tuv.equals(p_tuv)) {
                    continue;
                }
                targetLocales.add(tuv.getLocale().toString());
            }
        }
        TuvDocument tuvDoc = new TuvDocument(p_tuv.getFuzzyIndexFormat(), p_tuv.getId(), p_tuv.getTu().getId(),
                m_tmId, p_sourceLocale, targetLocales, m_analyzer);
        return tuvDoc.getDocument();
    }

    private Document createDocumentFromAddTuv(AddTuv p_tuv, boolean p_sourceLocale, boolean p_indexTargetLocales)
            throws Exception {
        if (p_indexTargetLocales) {
            throw new IllegalArgumentException("should never index target locales for TM3");
        }
        TuvDocument tuvDoc = new TuvDocument(p_tuv.getTuv().getFuzzyIndexFormat(), p_tuv.getNewTuvId(),
                p_tuv.getTuIdToAdd(), m_tmId, p_sourceLocale, null, m_analyzer);
        return tuvDoc.getDocument();
    }

    private Document createDocumentFromTM3Tuv(TM3Tuv<GSTuvData> p_tuv, boolean p_sourceLocale,
            boolean p_indexTargetLocales) throws Exception {
        if (!p_indexTargetLocales) {
            throw new IllegalArgumentException("should always index target locales for TM3");
        }
        Set<String> targetLocales = new HashSet<String>();
        for (TM3Tuv<GSTuvData> tuv : p_tuv.getTu().getAllTuv()) {
            if (tuv.equals(p_tuv)) {
                continue;
            }
            targetLocales.add(tuv.getLocale().toString());
        }
        TuvDocument tuvDoc = new TuvDocument(p_tuv.getContent().normalizeTuvData(), p_tuv.getId(),
                p_tuv.getTu().getId(), m_tmId, p_sourceLocale, targetLocales, m_analyzer);
        return tuvDoc.getDocument();
    }

}