edu.isistan.carcha.CarchaPipeline.java Source code

Java tutorial

Introduction

Here is the source code for edu.isistan.carcha.CarchaPipeline.java

Source

/**
 * Copyright 2015 UNICEN. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.isistan.carcha;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.InvalidXMLException;
import org.cleartk.stanford.StanfordCoreNLPAnnotator;
import org.cleartk.token.stem.snowball.DefaultSnowballStemmer;
import org.cleartk.token.type.Sentence;
import org.cleartk.util.ae.UriToDocumentTextAnnotator;
import org.cleartk.util.ae.UriToXmiCasAnnotator;
import org.cleartk.util.ae.linewriter.LineWriter;
import org.cleartk.util.ae.linewriter.annotation.CoveredTextAnnotationWriter;
import org.cleartk.util.cr.UriCollectionReader;
import org.springframework.util.StopWatch;

import com.google.common.base.Stopwatch;

import edu.isistan.carcha.annotator.AnnotationRemover;
import edu.isistan.carcha.annotator.DesignDecisionSentenceRemover;
import edu.isistan.carcha.writer.DesignDecisionWriter;
import edu.isistan.carcha.writer.XCasWriter;

/**
 * <br>
 * Copyright (c) 2013, Regents of the UNICEN <br>
 * All rights reserved.
 * 
 * @author German Attanasio Ruiz
 * 
 */
public class CarchaPipeline {

    /** The Constant UIMA_RUTA_SCRIPT. */
    private static final String UIMA_RUTA_SCRIPT = "edu.isistan.carcha.concern.cdetectorEngine";

    /** The Constant logger. */
    private static final Log logger = LogFactory.getLog(CarchaPipeline.class);

    /** The stanford nlp. */
    private AnalysisEngineDescription stanfordNLP;

    /**
     * Instantiates a new carcha pipeline.
     */
    public CarchaPipeline() {
        try {
            this.stanfordNLP = StanfordCoreNLPAnnotator.getDescription();
        } catch (ResourceInitializationException e) {
            logger.error("Error loading the standford annotators", e);
        }
    }

    /**
     * The main method.
     *
     * @param args the arguments
     * @throws Exception the exception
     */
    public static void main(String[] args) throws Exception {
        String inputDirectory = args[0];
        String output = args[1];

        CarchaPipeline carcha = new CarchaPipeline();
        StopWatch sw = new StopWatch();
        sw.start("executeStanfordAnnotators");
        if (args.length > 2 && args[2].equals("write")) {
            logger.info("Write Design Decision to file");
            carcha.writeAnnotations(inputDirectory, output);
        } else if (args.length > 2 && args[2].equals("sentence"))
            carcha.writeSentences(inputDirectory, output);
        else if (args.length > 2 && args[2].equals("sentence-annotator"))
            carcha.executeSentenceAnnotator(inputDirectory, output);
        else
            carcha.executeUIMAAnnotator(inputDirectory, output);
        sw.stop();
        logger.info(sw.prettyPrint());
    }

    /**
     * Write the concern annotations into @param outputFilename.
     *
     * @param inputDirectory the input directory
     * @param outputDirectory the output directory
     * @throws ResourceInitializationException the resource initialization exception
     * @throws UIMAException the uIMA exception
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public void writeAnnotations(String inputDirectory, String outputDirectory)
            throws ResourceInitializationException, UIMAException, IOException {
        File filesDirectory = new File(inputDirectory);
        SimplePipeline.runPipeline(UriCollectionReader.getDescriptionFromDirectory(filesDirectory),
                UriToXmiCasAnnotator.getDescription(),
                DesignDecisionWriter.getDesignDecisionWriterDescription(outputDirectory));
    }

    /**
     * Write the concern annotations into @param outputFilename.
     *
     * @param inputDirectory the input directory
     * @param outputFilename the output filename
     * @throws ResourceInitializationException the resource initialization exception
     * @throws UIMAException the uIMA exception
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public void writeSentences(String inputDirectory, String outputFilename)
            throws ResourceInitializationException, UIMAException, IOException {
        File filesDirectory = new File(inputDirectory);
        SimplePipeline.runPipeline(UriCollectionReader.getDescriptionFromDirectory(filesDirectory),
                UriToXmiCasAnnotator.getDescription(),

                AnalysisEngineFactory.createEngineDescription(LineWriter.class, LineWriter.PARAM_OUTPUT_FILE_NAME,
                        outputFilename, LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME, Sentence.class.getName(),
                        LineWriter.PARAM_ANNOTATION_WRITER_CLASS_NAME,
                        CoveredTextAnnotationWriter.class.getName()));
    }

    /**
     * Creates the ae description.
     *
     * @param script the script
     * @return the analysis engine description
     * @throws InvalidXMLException the invalid xml exception
     * @throws IOException Signals that an I/O exception has occurred.
     * @throws ResourceInitializationException the resource initialization exception
     */
    private AnalysisEngineDescription createAEDescription(String script)
            throws InvalidXMLException, IOException, ResourceInitializationException {
        final AnalysisEngineDescription ruta = AnalysisEngineFactory.createEngineDescription(script);
        return ruta;
    }

    /**
     * Execute Stanford annotators.
     * The annotators are: Tokenize, SSplit, POS, Lemma, NER, Parse, Dcoref + Snowball Stemmer
     *
     * @param _inputFile the _input file
     * @param _outputFile the _output file
     */
    public void executeUIMAAnnotator(String _inputFile, String _outputFile) {
        logger.info("Running executeUIMAAnnotator with: " + _inputFile + " and " + _outputFile);
        File file = new File(_inputFile);
        Stopwatch stopwatch = new Stopwatch().start();
        try {
            SimplePipeline.runPipeline(UriCollectionReader.getCollectionReaderFromFiles(Arrays.asList(file)),
                    UriToDocumentTextAnnotator.getDescription(), this.stanfordNLP, //stanford tokenize, ssplit, pos, lemma, ner, parse, dcoref
                    DefaultSnowballStemmer.getDescription("English"), //stemmer
                    createAEDescription(UIMA_RUTA_SCRIPT), //RUTA Analysis Engine
                    AnnotationRemover.getDescription(), //Remove useless annotations
                    DesignDecisionSentenceRemover.getDescription(), //Remove sentence annotations that are also designdecisions
                    AnalysisEngineFactory.createEngineDescription(//result files
                            XCasWriter.class, XCasWriter.PARAM_OUTPUT_FILE_NAME, _outputFile));
        } catch (Exception e) {
            logger.error("Error executing the uima annotator.", e);
        }
        stopwatch.stop(); // optional
        logger.info("executeUIMAAnnotator took: " + stopwatch); // formatted string like "12.3 ms"
    }

    /**
     * Execute sentence annotator.
     *
     * @param _inputFile the _input file
     * @param _outputFile the _output file
     */
    public void executeSentenceAnnotator(String _inputFile, String _outputFile) {
        logger.info("Running executeSentenceAnnotator with: " + _inputFile + " and " + _outputFile);
        File file = new File(_inputFile);
        Stopwatch stopwatch = new Stopwatch().start();
        try {
            SimplePipeline.runPipeline(UriCollectionReader.getCollectionReaderFromFiles(Arrays.asList(file)),
                    UriToDocumentTextAnnotator.getDescription(), this.stanfordNLP, //stanford tokenize, ssplit, pos, lemma, ner, parse, dcoref
                    AnnotationRemover.getDescription(), //Remove useless annotations
                    AnalysisEngineFactory.createEngineDescription(//result files
                            XCasWriter.class, XCasWriter.PARAM_OUTPUT_FILE_NAME, _outputFile));
        } catch (Exception e) {
            logger.error("Error executing the uima annotator.", e);
        }
        stopwatch.stop(); // optional
        logger.info("executeSentenceAnnotator took: " + stopwatch); // formatted string like "12.3 ms"
    }

}