org.apache.ctakes.temporal.data.analysis.EventPrinterPipeline.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.ctakes.temporal.data.analysis.EventPrinterPipeline.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.temporal.data.analysis;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.ctakes.relationextractor.eval.XMIReader;
import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.jcas.JCas;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.fit.util.JCasUtil;

import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.collect.Ordering;
import com.lexicalscope.jewel.cli.CliFactory;
import com.lexicalscope.jewel.cli.Option;

/**
 * Print events of given UMLS semantic type.
 * 
 * @author dmitriy dligach
 */
public class EventPrinterPipeline {

    static interface Options {

        @Option(description = "specify the path to the directory containing the xmi files")
        public File getInputDirectory();

        @Option(description = "specify the UMLS semantic type (e.g. 5, i.e. procedure)")
        public int getUmlsSemanticType();

        @Option(description = "specify the path to the output file")
        public File getEventOutputFile();
    }

    public static void main(String[] args) throws Exception {

        Options options = CliFactory.parseArguments(Options.class, args);

        List<File> trainFiles = Arrays.asList(options.getInputDirectory().listFiles());
        CollectionReader collectionReader = getCollectionReader(trainFiles);

        AnalysisEngine annotationConsumer = AnalysisEngineFactory.createEngine(EventWriter.class,
                "UmlsSemanticType", options.getUmlsSemanticType(), "EventOutputFile", options.getEventOutputFile());

        SimplePipeline.runPipeline(collectionReader, annotationConsumer);
    }

    /**
     * Print events with a given UMLS semantic type.
     */
    public static class EventWriter extends JCasAnnotator_ImplBase {

        @ConfigurationParameter(name = "EventOutputFile", mandatory = true, description = "path to the output file that will store the events")
        private String eventOutputFile;

        @ConfigurationParameter(name = "UmlsSemanticType", mandatory = true, description = "umls semantic type of interest")
        private int umlsSemanticType;

        @Override
        public void process(JCas jCas) throws AnalysisEngineProcessException {

            JCas goldView;
            try {
                goldView = jCas.getView("GoldView");
            } catch (CASException e) {
                throw new AnalysisEngineProcessException(e);
            }

            JCas systemView;
            try {
                systemView = jCas.getView("_InitialView");
            } catch (CASException e) {
                throw new AnalysisEngineProcessException(e);
            }

            BufferedWriter eventWriter = getWriter(eventOutputFile, true);
            try {
                for (EventMention eventMention : JCasUtil.select(goldView, EventMention.class)) {
                    List<EventMention> coveringSystemEventMentions = JCasUtil.selectCovered(systemView,
                            EventMention.class, eventMention.getBegin(), eventMention.getEnd());

                    for (EventMention systemEventMention : coveringSystemEventMentions) {
                        if (systemEventMention.getTypeID() == umlsSemanticType) {
                            String output = String.format("%s|%s\n",
                                    systemEventMention.getCoveredText().toLowerCase(),
                                    expandToNP(systemView, eventMention).toLowerCase());
                            try {
                                eventWriter.write(output);
                            } catch (IOException e) {
                                throw new AnalysisEngineProcessException(e);
                            }
                        }
                    }
                }
            } finally {
                try {
                    eventWriter.close();
                } catch (IOException e) {
                    throw new AnalysisEngineProcessException(e);
                }
            }
        }
    }

    public static BufferedWriter getWriter(String filePath, boolean append) {

        BufferedWriter bufferedWriter = null;
        try {
            FileWriter fileWriter = new FileWriter(filePath, append);
            bufferedWriter = new BufferedWriter(fileWriter);
        } catch (IOException e) {
            e.printStackTrace();
        }

        return bufferedWriter;
    }

    public static CollectionReader getCollectionReader(List<File> inputFiles) throws Exception {

        List<String> fileNames = new ArrayList<String>();
        for (File file : inputFiles) {
            if (!(file.isHidden())) {
                fileNames.add(file.getPath());
            }
        }

        String[] paths = new String[fileNames.size()];
        fileNames.toArray(paths);

        return CollectionReaderFactory.createReader(XMIReader.class, XMIReader.PARAM_FILES, paths);
    }

    public static String expandToNP(JCas jCas, IdentifiedAnnotation identifiedAnnotation) {

        // map each covering treebank node to its character length
        Map<TreebankNode, Integer> treebankNodeSizes = new HashMap<TreebankNode, Integer>();
        for (TreebankNode treebankNode : JCasUtil.selectCovering(jCas, TreebankNode.class,
                identifiedAnnotation.getBegin(), identifiedAnnotation.getEnd())) {

            // only expand nouns (and not verbs or adjectives)
            if (treebankNode instanceof TerminalTreebankNode) {
                if (!treebankNode.getNodeType().startsWith("N")) {
                    return identifiedAnnotation.getCoveredText();
                }
            }

            // because only nouns are expanded, look for covering NPs
            if (treebankNode.getNodeType().equals("NP")) {
                treebankNodeSizes.put(treebankNode, treebankNode.getCoveredText().length());
            }
        }

        // find the shortest covering treebank node
        List<TreebankNode> sortedTreebankNodes = new ArrayList<TreebankNode>(treebankNodeSizes.keySet());
        Function<TreebankNode, Integer> getValue = Functions.forMap(treebankNodeSizes);
        Collections.sort(sortedTreebankNodes, Ordering.natural().onResultOf(getValue));

        if (sortedTreebankNodes.size() > 0) {
            return sortedTreebankNodes.get(0).getCoveredText();
        }

        return identifiedAnnotation.getCoveredText();
    }
}