Java tutorial
/* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universitt Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.experiments.argumentation.sequence.evaluation.thirdparty; import de.tudarmstadt.ukp.experiments.argumentation.sequence.annotator.SequenceAnnotatorFromTokenLevelPredictions; import de.tudarmstadt.ukp.experiments.argumentation.sequence.evaluation.JCasIOHelper; import de.tudarmstadt.ukp.experiments.argumentation.sequence.evaluation.thirdparty.jython.JythonInterpreter; import de.tudarmstadt.ukp.dkpro.argumentation.types.ArgumentComponent; import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiReader; import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiWriter; import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; import org.apache.uima.fit.factory.AnalysisEngineFactory; import org.apache.uima.fit.factory.CollectionReaderFactory; import org.apache.uima.fit.pipeline.SimplePipeline; import org.apache.uima.jcas.JCas; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.List; /** * @author Ivan Habernal */ public class SegEvalEvaluator { private final JythonInterpreter interpreter; public SegEvalEvaluator() { interpreter = new JythonInterpreter(); } public double evaluate(JCas gold, JCas predicted, Class<ArgumentComponent> argumentComponentClass) { List<Integer> goldSegments = SegmentExtractor.extractSegments(gold, argumentComponentClass); List<Integer> predictedSegments = SegmentExtractor.extractSegments(predicted, argumentComponentClass); // System.out.printf(Locale.ENGLISH, "%.3f gold:%s predicted:%s%n", similarity, goldSegments, // predictedSegments); return interpreter.boundarySimilarity(goldSegments, predictedSegments); } /** * Takes each file from the {@code predictionsFolder} and evaluate it against its counterpart * from the {@code goldFolder} using boundary similarity metric. * * @param goldFolder gold data * @param predictionsFolder predictions (or any other data, ie. from annotators) * @param argumentComponentClass which argument component should be considered * @return statistics * @throws Exception */ public DescriptiveStatistics evaluateFolders(File goldFolder, File predictionsFolder, Class<ArgumentComponent> argumentComponentClass) throws Exception { DescriptiveStatistics statistics = new DescriptiveStatistics(); File[] files = predictionsFolder.listFiles(JCasIOHelper.XMI_FILTER); // System.out.println("Predicted files size: " + files.length); for (File predicted : files) { File gold = new File(goldFolder, predicted.getName()); if (!gold.exists()) { throw new IOException("Gold file " + gold + " does not exist!"); } JCas goldJCas = JCasIOHelper.loadJCasFromFile(gold); JCas predictedJCas = JCasIOHelper.loadJCasFromFile(predicted); double boundarySimilarity = evaluate(goldJCas, predictedJCas, argumentComponentClass); statistics.addValue(boundarySimilarity); } return statistics; } /** * Loads all data from predicted CSV, annotates a copy of gold data with predicted argument * components, and stores the data to the output folder * * @param goldDataPath gold data * @param predictionsCSV predicted csv * @param outputPath output path * @throws Exception */ public static void annotateDataWithPredictions(String goldDataPath, File predictionsCSV, File outputPath) throws Exception { SimplePipeline.runPipeline( CollectionReaderFactory.createReaderDescription(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION, goldDataPath, XmiReader.PARAM_PATTERNS, XmiReader.INCLUDE_PREFIX + "*.xmi"), AnalysisEngineFactory.createEngineDescription(SequenceAnnotatorFromTokenLevelPredictions.class, SequenceAnnotatorFromTokenLevelPredictions.PARAM_TOKEN_LEVEL_PREDICTIONS_CSV_FILE, predictionsCSV), AnalysisEngineFactory.createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, outputPath)); } public static final String goldDataPath = "TBD"; /** * Computes upper bounds by computing segment boundaries on three annotators * * @throws Exception */ public static void segmentEvaluationThreeAnnotators() throws Exception { File parentFolder = new File( "/usr/local/data/argumentation/all-annotations-phase2-exported/new-typesystem"); for (String folderName : Arrays.asList("phase2-final-annotator1", "phase2-final-annotator2", "phase2-final-annotator3")) { File annotatorFolder = new File(parentFolder, folderName); SegEvalEvaluator evaluator = new SegEvalEvaluator(); DescriptiveStatistics statistics = evaluator.evaluateFolders(new File(goldDataPath), annotatorFolder, ArgumentComponent.class); System.out.printf("%.3f +- %.3f", statistics.getMean(), statistics.getStandardDeviation()); } } public static void main(String[] args) throws Exception { segmentEvaluationThreeAnnotators(); // evaluatePredictions(); // evaluatePredictionsFolders(new File("/usr/local/data/argumentation/metacentrum-results-all-runs-exported-annotated/")); } }