etc.aloe.data.ExampleSet.java Source code

Java tutorial

Introduction

Here is the source code for etc.aloe.data.ExampleSet.java

Source

/*
 * This file is part of ALOE.
 *
 * ALOE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
    
 * ALOE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
    
 * You should have received a copy of the GNU General Public License
 * along with ALOE.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Copyright (c) 2012 SCCL, University of Washington (http://depts.washington.edu/sccl)
 */
package etc.aloe.data;

import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.instance.RemoveWithValues;

/**
 * ExampleSet contains information about data points that have features
 * extracted. These data points are ready for labeling by a model.
 *
 * Instances in an ExampleSet always have at least these attributes: 'message' -
 * which contains the message text. '*id' - which is a unique integer
 * identifying the message. 'label' - the ground truth label for the instance (0
 * or 1)
 *
 * @author Michael Brooks <mjbrooks@uw.edu>
 */
public class ExampleSet {

    public final static String ID_ATTR_NAME = "*id";
    public final static String MESSAGE_ATTR_NAME = "message";
    public final static String LABEL_ATTR_NAME = "label";
    public final static String PARTICIPANT_ATTR_NAME = "participant";

    private Instances instances;

    /**
     * Construct an ExampleSet containing the given instances.
     *
     * @param instances
     */
    public ExampleSet(Instances instances) {
        this.instances = instances;
    }

    /**
     * Make a copy of the ExampleSet, copying the underlying instances.
     *
     * @return
     */
    public ExampleSet copy() {
        return new ExampleSet(new Instances(instances));
    }

    /**
     * The size of the example set.
     *
     * @return
     */
    public int size() {
        return instances.size();
    }

    /**
     * Returns a new example set containing only those examples with labels.
     *
     * @return
     */
    public ExampleSet onlyLabeled() {
        RemoveWithValues filter = new RemoveWithValues();
        filter.setAttributeIndex("" + (instances.classIndex() + 1));
        filter.setMatchMissingValues(true);
        filter.setInvertSelection(true);

        try {
            filter.setInputFormat(instances);
            Instances result = Filter.useFilter(instances, filter);
            ExampleSet resultSet = new ExampleSet(result);
            return resultSet;
        } catch (Exception ex) {
            System.err.println("Unable to apply filter!");
            return null;
        }
    }

    /**
     * Get the ith instance.
     *
     * @param i
     * @return
     */
    public Instance get(int i) {
        return instances.get(i);
    }

    /**
     * Get the underlying instances.
     *
     * @return
     */
    public Instances getInstances() {
        return instances;
    }

    /**
     * Gets the actual label of the given example. If the example is unlabeled,
     * returns null;
     *
     * @param i
     * @return
     */
    public Boolean getTrueLabel(int i) {
        Instance instance = instances.get(i);
        return getClassLabel(instance.classValue());
    }

    /**
     * Converts a double class value into a boolean given the string labels for
     * the class attribute in this data set. Returns null if the class value is
     * weka missing.
     *
     * @param classValue
     * @return
     */
    public Boolean getClassLabel(double classValue) {
        if (Double.isNaN(classValue)) {
            return null;
        }

        Attribute classAttr = instances.classAttribute();
        String classValueStr = classAttr.value((int) classValue);
        return Boolean.parseBoolean(classValueStr);
    }

    /**
     * Gets the confidence in the positive class.
     * @param classDistribution
     * @param classValue
     * @return
     */
    public Double getConfidence(double[] classDistribution, double classValue) {
        if (Double.isNaN(classValue)) {
            return null;
        }

        return classDistribution[(int) classValue];
    }

    /**
     * Set the underlying instances.
     *
     * @param instances
     */
    public void setInstances(Instances instances) {
        this.instances = instances;
    }
}