Java tutorial
/* * This file is part of ALOE. * * ALOE is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * ALOE is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with ALOE. If not, see <http://www.gnu.org/licenses/>. * * Copyright (c) 2012 SCCL, University of Washington (http://depts.washington.edu/sccl) */ package etc.aloe.data; import weka.core.Attribute; import weka.core.Instance; import weka.core.Instances; import weka.filters.Filter; import weka.filters.unsupervised.instance.RemoveWithValues; /** * ExampleSet contains information about data points that have features * extracted. These data points are ready for labeling by a model. * * Instances in an ExampleSet always have at least these attributes: 'message' - * which contains the message text. '*id' - which is a unique integer * identifying the message. 'label' - the ground truth label for the instance (0 * or 1) * * @author Michael Brooks <mjbrooks@uw.edu> */ public class ExampleSet { public final static String ID_ATTR_NAME = "*id"; public final static String MESSAGE_ATTR_NAME = "message"; public final static String LABEL_ATTR_NAME = "label"; public final static String PARTICIPANT_ATTR_NAME = "participant"; private Instances instances; /** * Construct an ExampleSet containing the given instances. * * @param instances */ public ExampleSet(Instances instances) { this.instances = instances; } /** * Make a copy of the ExampleSet, copying the underlying instances. * * @return */ public ExampleSet copy() { return new ExampleSet(new Instances(instances)); } /** * The size of the example set. * * @return */ public int size() { return instances.size(); } /** * Returns a new example set containing only those examples with labels. * * @return */ public ExampleSet onlyLabeled() { RemoveWithValues filter = new RemoveWithValues(); filter.setAttributeIndex("" + (instances.classIndex() + 1)); filter.setMatchMissingValues(true); filter.setInvertSelection(true); try { filter.setInputFormat(instances); Instances result = Filter.useFilter(instances, filter); ExampleSet resultSet = new ExampleSet(result); return resultSet; } catch (Exception ex) { System.err.println("Unable to apply filter!"); return null; } } /** * Get the ith instance. * * @param i * @return */ public Instance get(int i) { return instances.get(i); } /** * Get the underlying instances. * * @return */ public Instances getInstances() { return instances; } /** * Gets the actual label of the given example. If the example is unlabeled, * returns null; * * @param i * @return */ public Boolean getTrueLabel(int i) { Instance instance = instances.get(i); return getClassLabel(instance.classValue()); } /** * Converts a double class value into a boolean given the string labels for * the class attribute in this data set. Returns null if the class value is * weka missing. * * @param classValue * @return */ public Boolean getClassLabel(double classValue) { if (Double.isNaN(classValue)) { return null; } Attribute classAttr = instances.classAttribute(); String classValueStr = classAttr.value((int) classValue); return Boolean.parseBoolean(classValueStr); } /** * Gets the confidence in the positive class. * @param classDistribution * @param classValue * @return */ public Double getConfidence(double[] classDistribution, double classValue) { if (Double.isNaN(classValue)) { return null; } return classDistribution[(int) classValue]; } /** * Set the underlying instances. * * @param instances */ public void setInstances(Instances instances) { this.instances = instances; } }