List of usage examples for weka.core Instance classValue
public double classValue();
From source file:dewaweebtreeclassifier.veranda.VerandaTree.java
/** * //from w w w .ja v a2s.c om * @param data */ public void buildTree(Instances data) { // exit if there is no data left in the dataset if (data.numInstances() == 0) { mChild = null; return; } double[] informationGains = new double[data.numAttributes()]; Enumeration enumAttrs = data.enumerateAttributes(); while (enumAttrs.hasMoreElements()) { Attribute attr = (Attribute) enumAttrs.nextElement(); informationGains[attr.index()] = computeGain(data, attr); } int maxIdx = Utils.maxIndex(informationGains); if (Utils.eq(informationGains[maxIdx], 0)) { mClassDistribution = new int[data.numClasses()]; Enumeration enumInst = data.enumerateInstances(); while (enumInst.hasMoreElements()) { Instance instance = (Instance) enumInst.nextElement(); mClassDistribution[(int) instance.classValue()]++; } mClassValue = Utils.maxIndex(mClassDistribution); } else { mSplitAttribute = data.attribute(maxIdx); Instances[] splitInstances = splitInstancesOnAttribute(data, mSplitAttribute); mChild = new VerandaTree[mSplitAttribute.numValues()]; for (int i = 0; i < mChild.length; i++) { mChild[i] = new VerandaTree(); mChild[i].buildTree(splitInstances[i]); } } }
From source file:dewaweebtreeclassifier.veranda.VerandaTree.java
/** * /* w w w. jav a2 s . com*/ * @param data * @return */ public double computeEntropy(Instances data) { double[] nClass = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance instance = (Instance) enumInstance.nextElement(); nClass[(int) instance.classValue()]++; } double entropy = 0.0; for (int i = 0; i < data.numClasses(); i++) { if (nClass[i] > 0) { double ratio = nClass[i] / data.numInstances(); entropy -= (ratio * Utils.log2(ratio)); } } return entropy; }
From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully/*from w w w. j av a 2 s . c o m*/ */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i])); } } }
From source file:edu.drexel.psal.jstylo.verifiers.WLSVM.java
License:Open Source License
/** * Converts an ARFF Instance into a string in the sparse format accepted by * LIBSVM// ww w . j a va 2 s. co m * * @param instance * @return */ protected String InstanceToSparse(Instance instance) { String line = new String(); int c = (int) instance.classValue(); if (c == 0) c = -1; line = c + " "; for (int j = 1; j < instance.numAttributes(); j++) { if (j - 1 == instance.classIndex()) { continue; } if (instance.isMissing(j - 1)) continue; if (instance.value(j - 1) != 0) line += " " + j + ":" + instance.value(j - 1); } // LOG.info(line); return (line + "\n"); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
public static ClusterContingencyTable evaluateClassifier(final Classifier classifier, final Instances test) { try {// ww w . jav a 2 s . co m final Map<Integer, Set<RealVector>> Umap = new TreeMap<Integer, Set<RealVector>>(); final Map<Integer, Set<RealVector>> Vmap = new TreeMap<Integer, Set<RealVector>>(); final Remove rm_filter = new Remove(); rm_filter.setAttributeIndicesArray(new int[] { test.classIndex() }); rm_filter.setInputFormat(test); for (final Instance i : test) { rm_filter.input(i); final double[] phi = rm_filter.output().toDoubleArray(); // final double[] phi = WekaUtil.unlabeledFeatures( i ); final int cluster = (int) classifier.classifyInstance(i); Set<RealVector> u = Umap.get(cluster); if (u == null) { u = new HashSet<RealVector>(); Umap.put(cluster, u); } u.add(new ArrayRealVector(phi)); final int true_label = (int) i.classValue(); Set<RealVector> v = Vmap.get(true_label); if (v == null) { v = new HashSet<RealVector>(); Vmap.put(true_label, v); } v.add(new ArrayRealVector(phi)); } final ArrayList<Set<RealVector>> U = new ArrayList<Set<RealVector>>(); for (final Map.Entry<Integer, Set<RealVector>> e : Umap.entrySet()) { U.add(e.getValue()); } final ArrayList<Set<RealVector>> V = new ArrayList<Set<RealVector>>(); for (final Map.Entry<Integer, Set<RealVector>> e : Vmap.entrySet()) { V.add(e.getValue()); } return new ClusterContingencyTable(U, V); } catch (final RuntimeException ex) { throw ex; } catch (final Exception ex) { throw new RuntimeException(ex); } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
public static ClusterContingencyTable evaluateClustering(final MetricSimilarityFunction f, final Instances single, final int max_branching) { final StreamingClusterer clusterer = new StreamingClusterer(f, max_branching); final Map<Integer, Set<RealVector>> Umap = new TreeMap<Integer, Set<RealVector>>(); final Map<Integer, Set<RealVector>> Vmap = new TreeMap<Integer, Set<RealVector>>(); for (final Instance i : single) { final double[] phi = WekaUtil.unlabeledFeatures(i); final int cluster = clusterer.clusterState(phi); Set<RealVector> u = Umap.get(cluster); if (u == null) { u = new HashSet<RealVector>(); Umap.put(cluster, u);/*from www . ja v a 2s.com*/ } u.add(new ArrayRealVector(phi)); final int true_label = (int) i.classValue(); Set<RealVector> v = Vmap.get(true_label); if (v == null) { v = new HashSet<RealVector>(); Vmap.put(true_label, v); } v.add(new ArrayRealVector(phi)); } final ArrayList<Set<RealVector>> U = new ArrayList<Set<RealVector>>(); for (final Map.Entry<Integer, Set<RealVector>> e : Umap.entrySet()) { U.add(e.getValue()); } final ArrayList<Set<RealVector>> V = new ArrayList<Set<RealVector>>(); for (final Map.Entry<Integer, Set<RealVector>> e : Vmap.entrySet()) { V.add(e.getValue()); } return new ClusterContingencyTable(U, V); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
public static Instances transformInstances(final Instances src, final CoordinateTransform transform) { final ArrayList<Attribute> out_attributes = new ArrayList<Attribute>(); for (int i = 0; i < transform.outDimension(); ++i) { out_attributes.add(new Attribute("x" + i)); }//from w w w .j ava2 s.c om out_attributes.add((Attribute) src.classAttribute().copy()); final Instances out = new Instances(src.relationName() + "_" + transform.name(), out_attributes, 0); for (int i = 0; i < src.size(); ++i) { final Instance inst = src.get(i); final RealVector flat = new ArrayRealVector(WekaUtil.unlabeledFeatures(inst)); final RealVector transformed_vector = transform.encode(flat).x; final double[] transformed = new double[transformed_vector.getDimension() + 1]; for (int j = 0; j < transformed_vector.getDimension(); ++j) { transformed[j] = transformed_vector.getEntry(j); } transformed[transformed.length - 1] = inst.classValue(); final Instance transformed_instance = new DenseInstance(inst.weight(), transformed); out.add(transformed_instance); transformed_instance.setDataset(out); } out.setClassIndex(out.numAttributes() - 1); return out; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
/** * @param args/*from ww w . j ava 2 s . co m*/ * @throws IOException * @throws FileNotFoundException */ public static void main(final String[] args) throws FileNotFoundException, IOException { final String experiment_file = args[0]; final File root_directory; if (args.length > 1) { root_directory = new File(args[1]); } else { root_directory = new File("."); } final CsvConfigurationParser csv_config = new CsvConfigurationParser(new FileReader(experiment_file)); final String experiment_name = FilenameUtils.getBaseName(experiment_file); final File expr_directory = new File(root_directory, experiment_name); expr_directory.mkdirs(); final Csv.Writer csv = new Csv.Writer( new PrintStream(new FileOutputStream(new File(expr_directory, "results.csv")))); final String[] parameter_headers = new String[] { "kpca.kernel", "kpca.rbf.sigma", "kpca.random_forest.Ntrees", "kpca.random_forest.max_depth", "kpca.Nbases", "multiclass.classifier", "multiclass.random_forest.Ntrees", "multiclass.random_forest.max_depth", "pairwise_classifier.max_branching", "training.label_noise" }; csv.cell("domain").cell("abstraction"); for (final String p : parameter_headers) { csv.cell(p); } csv.cell("Ntrain").cell("Ntest").cell("ami.mean").cell("ami.variance").cell("ami.confidence").newline(); for (int expr = 0; expr < csv_config.size(); ++expr) { try { final KeyValueStore expr_config = csv_config.get(expr); final Configuration config = new Configuration(root_directory.getPath(), expr_directory.getName(), expr_config); System.out.println("[Loading '" + config.training_data_single + "']"); final Instances single = WekaUtil .readLabeledDataset(new File(root_directory, config.training_data_single + ".arff")); final Instances train = new Instances(single, 0); final int[] idx = Fn.range(0, single.size()); int instance_counter = 0; Fn.shuffle(config.rng, idx); final int Ntrain = config.getInt("Ntrain_games"); // TODO: Rename? final double label_noise = config.getDouble("training.label_noise"); final int Nlabels = train.classAttribute().numValues(); assert (Nlabels > 0); for (int i = 0; i < Ntrain; ++i) { final Instance inst = single.get(idx[instance_counter++]); if (label_noise > 0 && config.rng.nextDouble() < label_noise) { int noisy_label = 0; do { noisy_label = config.rng.nextInt(Nlabels); } while (noisy_label == (int) inst.classValue()); System.out.println("Noisy label (" + inst.classValue() + " -> " + noisy_label + ")"); inst.setClassValue(noisy_label); } train.add(inst); inst.setDataset(train); } final Fn.Function2<Boolean, Instance, Instance> plausible_p = createPlausiblePredicate(config); final int Ntest = config.Ntest_games; int Ntest_added = 0; final ArrayList<Instances> tests = new ArrayList<Instances>(); while (instance_counter < single.size() && Ntest_added < Ntest) { final Instance inst = single.get(idx[instance_counter++]); boolean found = false; for (final Instances test : tests) { // Note that 'plausible_p' should be transitive if (plausible_p.apply(inst, test.get(0))) { WekaUtil.addInstance(test, inst); if (test.size() == 30) { Ntest_added += test.size(); } else if (test.size() > 30) { Ntest_added += 1; } found = true; break; } } if (!found) { final Instances test = new Instances(single, 0); WekaUtil.addInstance(test, inst); tests.add(test); } } final Iterator<Instances> test_itr = tests.iterator(); while (test_itr.hasNext()) { if (test_itr.next().size() < 30) { test_itr.remove(); } } System.out.println("=== tests.size() = " + tests.size()); System.out.println("=== Ntest_added = " + Ntest_added); System.out.println("[Training]"); final Evaluator evaluator = createEvaluator(config, train); // final Instances transformed_test = evaluator.prepareInstances( test ); System.out.println("[Evaluating]"); final int Nxval = evaluator.isSensitiveToOrdering() ? 10 : 1; final MeanVarianceAccumulator ami = new MeanVarianceAccumulator(); final MeanVarianceAccumulator errors = new MeanVarianceAccumulator(); final MeanVarianceAccumulator relative_error = new MeanVarianceAccumulator(); int c = 0; for (int xval = 0; xval < Nxval; ++xval) { for (final Instances test : tests) { // TODO: Debugging WekaUtil.writeDataset(new File(config.root_directory), "test_" + (c++), test); // transformed_test.randomize( new RandomAdaptor( config.rng ) ); // final ClusterContingencyTable ct = evaluator.evaluate( transformed_test ); test.randomize(new RandomAdaptor(config.rng)); final ClusterContingencyTable ct = evaluator.evaluate(test); System.out.println(ct); int Nerrors = 0; final MeanVarianceAccumulator mv = new MeanVarianceAccumulator(); for (int i = 0; i < ct.R; ++i) { final int max = Fn.max(ct.n[i]); Nerrors += (ct.a[i] - max); mv.add(((double) ct.a[i]) / ct.N * Nerrors / ct.a[i]); } errors.add(Nerrors); relative_error.add(mv.mean()); System.out.println("exemplar: " + test.get(0)); System.out.println("Nerrors = " + Nerrors); final PrintStream ct_out = new PrintStream( new FileOutputStream(new File(expr_directory, "ct_" + expr + "_" + xval + ".csv"))); ct.writeCsv(ct_out); ct_out.close(); final double ct_ami = ct.adjustedMutualInformation_max(); if (Double.isNaN(ct_ami)) { System.out.println("! ct_ami = NaN"); } else { ami.add(ct_ami); } System.out.println(); } } System.out.println("errors = " + errors.mean() + " (" + errors.confidence() + ")"); System.out.println( "relative_error = " + relative_error.mean() + " (" + relative_error.confidence() + ")"); System.out.println("AMI_max = " + ami.mean() + " (" + ami.confidence() + ")"); csv.cell(config.domain).cell(config.get("abstraction.discovery")); for (final String p : parameter_headers) { csv.cell(config.get(p)); } csv.cell(Ntrain).cell(Ntest).cell(ami.mean()).cell(ami.variance()).cell(ami.confidence()).newline(); } catch (final Exception ex) { ex.printStackTrace(); } } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.Experiments.java
License:Open Source License
/** * Constructs an InformationTheoreticMetricLearner from a set of labeled * state vector differences.//from w w w . jav a 2s . c om * * @param config * @param A0 * @param XL A labeled set of state vector differences. The label must be * the last attribute, and it must be 1 if the states are similar and 0 * if they are not. * @return */ private static <A> InformationTheoreticMetricLearner learnMetric(final Configuration config, final RealMatrix A0, final Instances XL) { final int d = XL.numAttributes() - 1; //XL.get( 0 ).getDimension(); System.out.println("d = " + d); final double u; final double ell; final double gamma = config.getDouble("itml.gamma"); // We will set 'ell' and 'u' using sample quantiles as described in // the ITML paper. final QuantileAccumulator qacc = new QuantileAccumulator(0.05, 0.95); final ArrayList<double[]> S = new ArrayList<double[]>(); final ArrayList<double[]> D = new ArrayList<double[]>(); for (int i = 0; i < XL.size(); ++i) { final Instance ii = XL.get(i); final double diff[] = new double[d]; for (int j = 0; j < d; ++j) { diff[j] = ii.value(j); } if (ii.classValue() == 0.0) { D.add(diff); } else { S.add(diff); } qacc.add(Math.sqrt(HilbertSpace.inner_prod(diff, A0, diff))); } // Set bounds to quantile esimates ell = qacc.estimates[0]; u = qacc.estimates[1]; System.out.println("ITML: ell = " + ell); System.out.println("ITML: u = " + u); final InformationTheoreticMetricLearner itml = new InformationTheoreticMetricLearner(S, D, u, ell, A0, gamma, config.rng); itml.run(); return itml; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.Experiments.java
License:Open Source License
private static <A> SingleInstanceDataset<A> combineInstances(final RandomGenerator rng, final SingleInstanceDataset<A> running, final SingleInstanceDataset<A> new_, final double discount) { // final int[] idx = Fn.range( 0, running.size() ); // Fn.shuffle( rng, idx ); ///*from w ww .ja v a2s. co m*/ // for( int i = 0; i < running.size(); ++i ) { // final double p = rng.nextDouble(); // if( p < discount ) { // running.set( i, new_.get( idx[i] ) ); // } // } // // return running; // TODO: This is the "right" thing to do, but the dataset might get // unmanageable quickly for (final Instance inst : new_.instances) { inst.setValue(inst.classIndex(), running.action_to_int.get(new_.int_to_action.get((int) inst.classValue()))); running.instances.add(inst); } return running; }