de.uniheidelberg.cl.swp.mlprocess.InstanceContainer.java Source code

Java tutorial

Introduction

Here is the source code for de.uniheidelberg.cl.swp.mlprocess.InstanceContainer.java

Source

/*
 * ELAC: Ensemble Learning for Anaphora- and Coreference-Resolution-Systems
 * package: de.uniheidelberg.cl.swp.mlprocess
 * class: InstanceContainer
 * 
 * Authors: E-Mail
 * Thomas Boegel: boegel@cl.uni-heidelberg.de
 * Lukas Funk: funk@cl.uni-heidelberg.de
 * Andreas Kull: kull@cl.uni-heidelberg.de
 * 
 * Please find a detailed explanation of this particular class/package and its role and usage at
 * the first JavaDoc following this comment.
 * 
 * Copyright 2010 Thomas Boegel & Lukas Funk & Andreas Kull
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package de.uniheidelberg.cl.swp.mlprocess;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import de.uniheidelberg.cl.swp.datastructure.CoreferencePair;
import de.uniheidelberg.cl.swp.datastructure.Feature;
import de.uniheidelberg.cl.swp.featureExtraction.AbstractFeatureExtractor;
import de.uniheidelberg.cl.swp.featureExtraction.features.FeatureType;
import de.uniheidelberg.cl.swp.testacr.BARTRunner;
import de.uniheidelberg.cl.swp.testacr.JavaRapRunner;
import de.uniheidelberg.cl.swp.testacr.LingpipeRunner;

/**
 * A container for WEKA Instances utilized by {@link de.uniheidelberg.cl.swp.mlprocess.MLProcess}.
 * <br>
 * This is a temporary container for providing the attribute structure which WEKA requests.
 */
public class InstanceContainer {
    private Instances instances;

    /**
     * Takes a list of {@link AbstractFeatureExtractor} and adds the corresponding feature as a
     * WEKA Attribute to the Instances structure.
     *  
     * @param featureList The list of FeatureExtractors to be added.
     */
    public void createAttributeStructure(List<AbstractFeatureExtractor> featureList) {
        if (this.instances != null) {
            return;
        }
        ArrayList<Attribute> wekaAttributes = new ArrayList<Attribute>(featureList.size());

        for (AbstractFeatureExtractor fe : featureList) {
            wekaAttributes.add(fe.getWekaAttribute());
        }

        /* Add the ACR-Systems as feature */
        List<String> acrFeature = new ArrayList<String>();

        acrFeature.add(JavaRapRunner.class.getCanonicalName());
        acrFeature.add(LingpipeRunner.class.getCanonicalName());
        acrFeature.add(BARTRunner.class.getCanonicalName());

        List<String> correctFalsePrediction = new ArrayList<String>();

        /* Add the possible prediction values */
        correctFalsePrediction.add("+");
        correctFalsePrediction.add("-");
        correctFalsePrediction.add("?");

        wekaAttributes.add(new Attribute("acrSystem", acrFeature));
        wekaAttributes.add(new Attribute("corretOutputBySystem", correctFalsePrediction));

        this.instances = new Instances("ACResolution", wekaAttributes, 0);
        this.instances.setClassIndex(this.instances.numAttributes() - 1);
    }

    /**
     * Takes the name of a feature and returns the position of the corresponding WEKA Attribute
     * object for the feature.
     * <br>
     * This has to be done to maintain the correlation between feature and attribute.
     * 
     * @param name Name of the feature.
     * @return Position of the corresponding attribute.
     */
    private int getAttributeByName(String name) {
        for (int i = 0; i < this.instances.numAttributes(); i++) {
            if (this.instances.attribute(i).name().equals(name))
                return i;
        }
        return 0;
    }

    /**
     * Takes a single {@link CoreferencePair} and adds it to the Instances.
     * <br>
     * This method is used in the training process and + or - will be added.
     * <br>
     * This is done by matching all the Attributes to the corresponding features of the
     * {@link CoreferencePair}.
     * 
     * @param corefPair {@link CoreferencePair} to be added.
     * @param acr ACR-System.
     * @param result Result of the ACR-System for the {@link CoreferencePair}-candidate.
     * @return Instance object which has been added.
     */
    public Instance addCorefInstance(CoreferencePair corefPair, String acr, String result) {
        double[] vals = new double[this.instances.numAttributes()];

        for (Feature<?> fe : corefPair.getFeatuerVector()) {
            int currentCorefAttribute = getAttributeByName(fe.getName());

            /* if the current feature is numeric, parse the value as integer */
            if (fe.getFtype() == FeatureType.NUMERIC) {
                vals[currentCorefAttribute] = Double.parseDouble(fe.getStringValue());
            } else {
                vals[currentCorefAttribute] = this.instances.attribute(currentCorefAttribute)
                        .indexOfValue(fe.getStringValue());
            }
        }
        /* (vals.length-2) contains the acrRunner which made the prediction */
        vals[vals.length - 2] = this.instances.attribute(vals.length - 2).indexOfValue(acr);

        /* (vals.length-1) describes whether the system made the correct prediction */
        vals[vals.length - 1] = this.instances.attribute(vals.length - 1).indexOfValue(result);

        Instance inst = new DenseInstance(1.0, vals);
        this.instances.add(inst);
        return inst;
    }

    /**
     * Takes a single {@link CoreferencePair} and adds it to the Instances.
     * <br>
     * This method is used in the test process and "?" will be added automatically.
     * <br>
     * This is done by matching all the Attributes to the corresponding features of the
     * {@link CoreferencePair}.
     * 
     * @param corefPair {@link CoreferencePair} to be added.
     * @param acr ACR-System.
     * @return Instance object which has been added.
     */
    public Instance addCorefInstance(CoreferencePair corefPair, String acr) {
        double[] vals = new double[this.instances.numAttributes()];

        for (Feature<?> fe : corefPair.getFeatuerVector()) {
            int currentCorefAttribute = getAttributeByName(fe.getName());

            if (fe.getFtype() == FeatureType.NUMERIC) {
                vals[currentCorefAttribute] = Double.parseDouble(fe.getStringValue());
            } else {
                vals[currentCorefAttribute] = this.instances.attribute(currentCorefAttribute)
                        .indexOfValue(fe.getStringValue());
            }
        }
        vals[vals.length - 2] = this.instances.attribute(vals.length - 2).indexOfValue(acr);
        vals[vals.length - 1] = this.instances.attribute(vals.length - 1).indexOfValue("?");

        Instance inst = new DenseInstance(1.0, vals);

        this.instances.add(inst);

        return inst;
    }

    /**
     * Adds a List of {@link CoreferencePair}s to the list of Instances.
     * 
     * @param corefPairs {@link CoreferencePair}s to be added. 
     * @param acr Related ACR-System.
     * @param result Result of the related ACR-System.
     * @return Mapping between {@link CoreferencePair}s and their corresponding Instance objects.
     */
    public Map<CoreferencePair, Instance> addCorefInstances(List<CoreferencePair> corefPairs, String acr,
            String result) {
        Map<CoreferencePair, Instance> crpInstAlignment = new HashMap<CoreferencePair, Instance>();

        for (CoreferencePair corefPair : corefPairs) {
            Instance inst = this.addCorefInstance(corefPair, acr, result);
            crpInstAlignment.put(corefPair, inst);
        }
        return crpInstAlignment;
    }

    /**
     * Getter for the Instances object which contains the stored Instance objects.
     * 
     * @return The Instance objects stored in the Instances Container.
     */
    public Instances getInstances() {
        return instances;
    }
}