eu.crydee.alignment.aligner.BritannicaP.java Source code

Java tutorial

Introduction

Here is the source code for eu.crydee.alignment.aligner.BritannicaP.java

Source

/*
 * Copyright 2014 Hugo m09? Mougard.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package eu.crydee.alignment.aligner;

import eu.crydee.alignment.aligner.ae.AlignBestAE;
import eu.crydee.alignment.aligner.ae.CosineSimilarityAE;
import eu.crydee.alignment.aligner.ae.EvaluatorC;
import eu.crydee.alignment.aligner.ae.HtmlOutputC;
import eu.crydee.alignment.aligner.ae.IsfAE;
import eu.crydee.alignment.aligner.ae.TAFC;
import eu.crydee.alignment.aligner.ae.StopWordsAE;
import eu.crydee.alignment.aligner.ae.XmiSerializerC;
import eu.crydee.alignment.aligner.cr.BritannicaCR;
import eu.crydee.alignment.aligner.resources.ConfusionMatrixResImpl;
import java.io.IOException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.factory.AggregateBuilder;
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.factory.ExternalResourceFactory.createExternalResourceDescription;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.resource.ExternalResourceDescription;
import org.apache.uima.resource.ResourceInitializationException;

/**
 *
 * @author Hugo m09? Mougard
 */
public class BritannicaP {

    private static final Logger logger = LogManager.getLogger(BritannicaP.class);

    private static String CORPUS, ANNS;

    public static void main(String[] args) throws ResourceInitializationException, UIMAException, IOException {
        parseArguments(args);
        ExternalResourceDescription matrixD = createExternalResourceDescription(ConfusionMatrixResImpl.class, "");
        CollectionReader cr = createReader(BritannicaCR.class, BritannicaCR.PARAM_BRITANNICA_CORPUS_PATH, CORPUS,
                BritannicaCR.PARAM_BRITANNICA_ANNOTATIONS_PATH, ANNS, BritannicaCR.PARAM_VIEW_NAME_ELEMENTARY,
                Config.elementaryView, BritannicaCR.PARAM_VIEW_NAME_NORMAL, Config.regularView);
        AnalysisEngineDescription stopWords = createEngineDescription(StopWordsAE.class);
        AnalysisEngineDescription isf = createEngineDescription(IsfAE.class, IsfAE.PARAM_VIEW_ELEMENTARY,
                Config.elementaryView, IsfAE.PARAM_VIEW_REGULAR, Config.regularView);
        AnalysisEngineDescription cosineSimilarity = createEngineDescription(CosineSimilarityAE.class,
                CosineSimilarityAE.PARAM_VIEW_LEFT, Config.elementaryView, CosineSimilarityAE.PARAM_VIEW_RIGHT,
                Config.regularView);
        AnalysisEngineDescription aligner = createEngineDescription(AlignBestAE.class, AlignBestAE.PARAM_VIEW_LEFT,
                Config.elementaryView, AlignBestAE.PARAM_VIEW_RIGHT, Config.regularView);
        AnalysisEngineDescription evaluator = createEngineDescription(EvaluatorC.class,
                EvaluatorC.PARAM_VIEW_ELEMENTARY, Config.elementaryView, EvaluatorC.PARAM_VIEW_REGULAR,
                Config.regularView, EvaluatorC.RES_CONFUSION_MATRIX, matrixD);
        AnalysisEngineDescription casWriter = createEngineDescription(XmiSerializerC.class,
                XmiSerializerC.PARAM_OUT_FOLDER, "out/cas");
        AnalysisEngineDescription htmlWriter = createEngineDescription(HtmlOutputC.class,
                HtmlOutputC.PARAM_OUT_FOLDER, "out/html", HtmlOutputC.PARAM_VIEW_ELEMENTARY, Config.elementaryView,
                HtmlOutputC.PARAM_VIEW_REGULAR, Config.regularView);
        AnalysisEngineDescription tafWriter = createEngineDescription(TAFC.class, TAFC.PARAM_OUTPUT_FOLDER,
                "out/taf", TAFC.PARAM_VIEW_LEFT, Config.elementaryView, TAFC.PARAM_VIEW_RIGHT, Config.regularView);
        AggregateBuilder b = new AggregateBuilder();
        b.add(stopWords, CAS.NAME_DEFAULT_SOFA, Config.elementaryView);
        b.add(stopWords, CAS.NAME_DEFAULT_SOFA, Config.regularView);
        b.add(isf);
        b.add(cosineSimilarity);
        b.add(aligner);
        b.add(evaluator);
        b.add(casWriter);
        b.add(tafWriter);
        b.add(htmlWriter);

        AnalysisEngine completeAe = b.createAggregate();

        SimplePipeline.runPipeline(cr, completeAe);
    }

    static private void parseArguments(String[] args) {
        Options shortCircuitOptions = new Options();
        shortCircuitOptions
                .addOption(OptionBuilder.withLongOpt("help").withDescription("Print this message.").create('h'));
        shortCircuitOptions
                .addOption(OptionBuilder.withLongOpt("version").withDescription("Print the version.").create('v'));
        Options options = new Options();
        options.addOption(OptionBuilder.isRequired().withLongOpt("corpus-path").hasArg().withArgName("folder-path")
                .withDescription("Path to the Britannica corpus.").create('c'));
        options.addOption(
                OptionBuilder.isRequired().withLongOpt("annotations-path").hasArg().withArgName("folder-path")
                        .withDescription("Path to the test folder of the annotations.").create('a'));
        try {
            CommandLineParser parser = new PosixParser();
            CommandLine cmd = parser.parse(shortCircuitOptions, args, true);
            if (cmd.hasOption('h')) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("britannica", options, true);
                System.exit(0);
            }
            if (cmd.hasOption('v')) {
                System.out.println("britannica-aligner v1.0.0-SNAPSHOT");
                System.exit(0);
            }
            cmd = parser.parse(options, args);
            CORPUS = cmd.getOptionValue('c');
            ANNS = cmd.getOptionValue('a');
        } catch (ParseException ex) {
            System.err.println("The CLI args could not be parsed.");
            System.err.println("The error message was:");
            System.err.println(" " + ex.getMessage());
            System.exit(1);
        }
    }
}