com.aliasi.lingmed.omim.IndexOmim.java Source code

Java tutorial

Introduction

Here is the source code for com.aliasi.lingmed.omim.IndexOmim.java

Source

/*
 * LingPipe v. 2.0
 * Copyright (C) 2003-5 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://www.alias-i.com/lingpipe/licenseV1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */

package com.aliasi.lingmed.omim;

import com.aliasi.corpus.Handler;
import com.aliasi.corpus.ObjectHandler;
import com.aliasi.corpus.Parser;

import com.aliasi.lingmed.dao.DaoException;
import com.aliasi.lingmed.utils.FileUtils;
import com.aliasi.lingmed.utils.Logging;

import com.aliasi.util.AbstractCommand;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import java.util.Properties;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;

import org.xml.sax.InputSource;

import org.apache.log4j.Logger;

/** 
 * <P>The <code>IndexOmim</code> command processes a
 * file of OMIM records and adds
 * them to a Lucene Index on the local filesystem.
 * 
 * <P>The following arguments are required:
 *
 * <dt><code>-index</code></dt>
 * <dd>Path to the Lucene index file.
 * </dd>
 *
 * <dt><code>-distFile</code></dt>
 * <dd>Path to the OMIM distribution text file.
 * </dd>
 * </dl>
 *
 *
 * @author Mitzi Morris
 * @version 1.0
 * @since   LingMed1.0
 */

public class IndexOmim extends AbstractCommand {
    private final Logger mLogger = Logger.getLogger(IndexOmim.class);

    private File mDistFile;
    private String mDistFileName;
    private File mIndex;
    private String mIndexName;

    private final static String DIST_FILE = "distFile";
    private final static String LUCENE_INDEX = "index";
    private final static Properties DEFAULT_PARAMS = new Properties();

    private OmimCodec mCodec = new OmimCodec();

    // Instantiate IndexOmim object and 
    // initialize instance variables per command line args
    private IndexOmim(String[] args) throws Exception {
        super(args, DEFAULT_PARAMS);
        mIndexName = getExistingArgument(LUCENE_INDEX);
        mDistFileName = getExistingArgument(DIST_FILE);
        reportParameters();
        mIndex = FileUtils.checkIndex(mIndexName, true);
        mDistFile = FileUtils.checkInputFile(mDistFileName);
    }

    private void reportParameters() {
        mLogger.info("Indexing OMIM " + "\n\tIndex=" + mIndexName + "\n\tOmim distribution=" + mDistFileName);
    }

    public void run() {
        mLogger.info("Begin indexing");
        try {
            IndexWriter indexWriter = new IndexWriter(mIndex, mCodec.getAnalyzer());
            OmimIndexer indexer = new OmimIndexer(indexWriter);
            Parser<ObjectHandler<OmimRecord>> parser = new OmimParser(true);
            parser.setHandler(indexer);
            InputSource inSource = new InputSource();
            inSource.setCharacterStream(new FileReader(mDistFile));
            parser.parse(inSource);
            mLogger.info("Parsed index, now optimize.");
            indexer.close();
            mLogger.info("Processing complete.");
        } catch (Exception e) {
            mLogger.warn("Unexpected Exception: " + e.getMessage());
            mLogger.warn("stack trace: " + Logging.logStackTrace(e));
            IllegalStateException e2 = new IllegalStateException(e.getMessage());
            e2.setStackTrace(e.getStackTrace());
            throw e2;
        }
    }

    public static void main(String[] args) throws Exception {
        IndexOmim indexer = new IndexOmim(args);
        indexer.run();
    }

    static class OmimIndexer implements ObjectHandler<OmimRecord> {
        IndexWriter mIndexWriter;
        final OmimCodec mOmimCodec;

        public OmimIndexer(IndexWriter indexWriter) {
            mIndexWriter = indexWriter;
            mOmimCodec = new OmimCodec();
        }

        public void handle(OmimRecord rec) {
            if (!rec.isMoved()) {
                Logger logger = Logger.getLogger(IndexOmim.class);
                if (logger.isDebugEnabled()) {
                    logger.debug("Adding OmimRecord, MimId=" + rec.getMimId() + "\ttitle=" + rec.getTitle());
                    String[] alts = rec.getAltTitles();
                    for (String alt : alts) {
                        logger.debug("\talt title=" + alt);
                    }
                }
                Document doc = mOmimCodec.toDocument(rec);
                try {
                    mIndexWriter.addDocument(doc);
                } catch (IOException ioe) {
                    Logger.getLogger(IndexOmim.class).warn("Exception indexing OMIM: " + ioe);
                }
            }
        }

        public void close() throws IOException {
            mIndexWriter.optimize(); // merges segments
            mIndexWriter.close(); // commits to disk
        }
    }

}