de.tudarmstadt.ukp.experiments.argumentation.sequence.evaluation.thirdparty.SegEvalEvaluator.java Source code

Introduction

Here is the source code for de.tudarmstadt.ukp.experiments.argumentation.sequence.evaluation.thirdparty.SegEvalEvaluator.java
Source

/*
 * Copyright 2016
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package de.tudarmstadt.ukp.experiments.argumentation.sequence.evaluation.thirdparty;

import de.tudarmstadt.ukp.experiments.argumentation.sequence.annotator.SequenceAnnotatorFromTokenLevelPredictions;
import de.tudarmstadt.ukp.experiments.argumentation.sequence.evaluation.JCasIOHelper;
import de.tudarmstadt.ukp.experiments.argumentation.sequence.evaluation.thirdparty.jython.JythonInterpreter;
import de.tudarmstadt.ukp.dkpro.argumentation.types.ArgumentComponent;
import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiReader;
import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiWriter;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.jcas.JCas;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

/**
 * @author Ivan Habernal
 */
public class SegEvalEvaluator {

    private final JythonInterpreter interpreter;

    public SegEvalEvaluator() {
        interpreter = new JythonInterpreter();
    }

    public double evaluate(JCas gold, JCas predicted, Class<ArgumentComponent> argumentComponentClass) {
        List<Integer> goldSegments = SegmentExtractor.extractSegments(gold, argumentComponentClass);
        List<Integer> predictedSegments = SegmentExtractor.extractSegments(predicted, argumentComponentClass);

        //        System.out.printf(Locale.ENGLISH, "%.3f gold:%s predicted:%s%n", similarity, goldSegments,
        //                predictedSegments);

        return interpreter.boundarySimilarity(goldSegments, predictedSegments);
    }

    /**
     * Takes each file from the {@code predictionsFolder} and evaluate it against its counterpart
     * from the {@code goldFolder} using boundary similarity metric.
     *
     * @param goldFolder             gold data
     * @param predictionsFolder      predictions (or any other data, ie. from annotators)
     * @param argumentComponentClass which argument component should be considered
     * @return statistics
     * @throws Exception
     */
    public DescriptiveStatistics evaluateFolders(File goldFolder, File predictionsFolder,
            Class<ArgumentComponent> argumentComponentClass) throws Exception {
        DescriptiveStatistics statistics = new DescriptiveStatistics();

        File[] files = predictionsFolder.listFiles(JCasIOHelper.XMI_FILTER);
        //        System.out.println("Predicted files size: " + files.length);

        for (File predicted : files) {
            File gold = new File(goldFolder, predicted.getName());

            if (!gold.exists()) {
                throw new IOException("Gold file " + gold + " does not exist!");
            }

            JCas goldJCas = JCasIOHelper.loadJCasFromFile(gold);
            JCas predictedJCas = JCasIOHelper.loadJCasFromFile(predicted);

            double boundarySimilarity = evaluate(goldJCas, predictedJCas, argumentComponentClass);

            statistics.addValue(boundarySimilarity);
        }

        return statistics;
    }

    /**
     * Loads all data from predicted CSV, annotates a copy of gold data with predicted argument
     * components, and stores the data to the output folder
     *
     * @param goldDataPath   gold data
     * @param predictionsCSV predicted csv
     * @param outputPath     output path
     * @throws Exception
     */
    public static void annotateDataWithPredictions(String goldDataPath, File predictionsCSV, File outputPath)
            throws Exception {
        SimplePipeline.runPipeline(
                CollectionReaderFactory.createReaderDescription(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION,
                        goldDataPath, XmiReader.PARAM_PATTERNS, XmiReader.INCLUDE_PREFIX + "*.xmi"),
                AnalysisEngineFactory.createEngineDescription(SequenceAnnotatorFromTokenLevelPredictions.class,
                        SequenceAnnotatorFromTokenLevelPredictions.PARAM_TOKEN_LEVEL_PREDICTIONS_CSV_FILE,
                        predictionsCSV),
                AnalysisEngineFactory.createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION,
                        outputPath));
    }

    public static final String goldDataPath = "TBD";

    /**
     * Computes upper bounds by computing segment boundaries on three annotators
     *
     * @throws Exception
     */
    public static void segmentEvaluationThreeAnnotators() throws Exception {
        File parentFolder = new File(
                "/usr/local/data/argumentation/all-annotations-phase2-exported/new-typesystem");
        for (String folderName : Arrays.asList("phase2-final-annotator1", "phase2-final-annotator2",
                "phase2-final-annotator3")) {
            File annotatorFolder = new File(parentFolder, folderName);

            SegEvalEvaluator evaluator = new SegEvalEvaluator();
            DescriptiveStatistics statistics = evaluator.evaluateFolders(new File(goldDataPath), annotatorFolder,
                    ArgumentComponent.class);

            System.out.printf("%.3f +- %.3f", statistics.getMean(), statistics.getStandardDeviation());
        }
    }

    public static void main(String[] args) throws Exception {
        segmentEvaluationThreeAnnotators();
        //        evaluatePredictions();

        //        evaluatePredictionsFolders(new File("/usr/local/data/argumentation/metacentrum-results-all-runs-exported-annotated/"));
    }

}