List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java
License:Open Source License
/** * Given a (possibly empty) Instances object containing the required weka Attributes, generates a weka Instance for a * single data point./*from w w w . j ava 2 s. co m*/ * * @param instances the weka Instances object containing attributes * @param data_point the data point to convert * @return a weka instance with assigned attributes */ protected static Instance assignWekaAttributes(Instances instances, Word data_point) { double[] instance = new double[instances.numAttributes()]; for (int i = 0; i < instances.numAttributes(); ++i) { Attribute attribute = instances.attribute(i); if (data_point.hasAttribute(attribute.name()) && !data_point.getAttribute(attribute.name()).toString().equals("?")) { switch (attribute.type()) { case Attribute.NOMINAL: int index = attribute.indexOfValue(data_point.getAttribute(attribute.name()).toString()); instance[i] = (double) index; break; case Attribute.NUMERIC: // Check if value is really a number. try { instance[i] = Double.valueOf(data_point.getAttribute(attribute.name()).toString()); } catch (NumberFormatException e) { AuToBIUtils.error("Number expected for feature: " + attribute.name()); } break; case Attribute.STRING: instance[i] = attribute.addStringValue(data_point.getAttribute(attribute.name()).toString()); break; default: AuToBIUtils.error("Unknown attribute type"); } } else { instance[i] = Utils.missingValue(); } } Instance inst = new DenseInstance(1, instance); inst.setDataset(instances); return inst; }
From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java
License:Open Source License
/** * Assigns a class attribute to a weka Instances object. * <p/>// w w w . ja v a 2s . co m * If no class attribute is given, or if the class attribute is not found in the list of attributes, the last * attribute is set to the class attribute. * * @param instances the instances object * @param class_attribute the desired class attribute. */ static void setWekaClassAttribute(Instances instances, String class_attribute) { if (class_attribute != null) { int i = 0; boolean set = false; while (i < instances.numAttributes() && !set) { Attribute attr = instances.attribute(i); if (class_attribute.equals(attr.name())) { instances.setClassIndex(i); set = true; } ++i; } if (!set) { instances.setClassIndex(instances.numAttributes() - 1); } } else { instances.setClassIndex(instances.numAttributes() - 1); } }
From source file:edu.insight.finlaw.multilabel.rough.CreateInstances.java
License:Open Source License
/** * Generates the Instances object and outputs it in ARFF format to stdout. * * @param args ignored// ww w .j a va2 s . c om * @throws Exception if generation of instances fails */ public static void main(String[] args) throws Exception { ArrayList<Attribute> atts; ArrayList<Attribute> attsRel; ArrayList<String> attVals; ArrayList<String> attValsRel; Instances data; Instances dataRel; double[] vals; double[] valsRel; int i; // 1. set up attributes atts = new ArrayList<Attribute>(); // - numeric atts.add(new Attribute("att1")); // - nominal attVals = new ArrayList<String>(); for (i = 0; i < 5; i++) attVals.add("val" + (i + 1)); atts.add(new Attribute("att2", attVals)); // - string atts.add(new Attribute("att3", (ArrayList<String>) null)); // - date atts.add(new Attribute("att4", "yyyy-MM-dd")); // - relational attsRel = new ArrayList<Attribute>(); // -- numeric attsRel.add(new Attribute("att5.1")); // -- nominal attValsRel = new ArrayList<String>(); for (i = 0; i < 5; i++) attValsRel.add("val5." + (i + 1)); attsRel.add(new Attribute("att5.2", attValsRel)); dataRel = new Instances("att5", attsRel, 0); atts.add(new Attribute("att5", dataRel, 0)); // 2. create Instances object data = new Instances("MyRelation", atts, 0); // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = Math.PI; // - nominal vals[1] = attVals.indexOf("val3"); // - string vals[2] = data.attribute(2).addStringValue("This is a string!"); // - date vals[3] = data.attribute(3).parseDate("2001-11-09"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.PI + 1; valsRel[1] = attValsRel.indexOf("val5.3"); dataRel.add(new DenseInstance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.PI + 2; valsRel[1] = attValsRel.indexOf("val5.2"); dataRel.add(new DenseInstance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new DenseInstance(1.0, vals)); // second instance vals = new double[data.numAttributes()]; // important: needs NEW array! // - numeric vals[0] = Math.E; // - nominal vals[1] = attVals.indexOf("val1"); // - string vals[2] = data.attribute(2).addStringValue("And another one!"); // - date vals[3] = data.attribute(3).parseDate("2000-12-01"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.E + 1; valsRel[1] = attValsRel.indexOf("val5.4"); dataRel.add(new DenseInstance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.E + 2; valsRel[1] = attValsRel.indexOf("val5.1"); dataRel.add(new DenseInstance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new DenseInstance(1.0, vals)); // 4. output data System.out.println(data); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
public static Instances transformInstances(final Instances src, final CoordinateTransform transform) { final ArrayList<Attribute> out_attributes = new ArrayList<Attribute>(); for (int i = 0; i < transform.outDimension(); ++i) { out_attributes.add(new Attribute("x" + i)); }// ww w . j av a 2s .co m out_attributes.add((Attribute) src.classAttribute().copy()); final Instances out = new Instances(src.relationName() + "_" + transform.name(), out_attributes, 0); for (int i = 0; i < src.size(); ++i) { final Instance inst = src.get(i); final RealVector flat = new ArrayRealVector(WekaUtil.unlabeledFeatures(inst)); final RealVector transformed_vector = transform.encode(flat).x; final double[] transformed = new double[transformed_vector.getDimension() + 1]; for (int j = 0; j < transformed_vector.getDimension(); ++j) { transformed[j] = transformed_vector.getEntry(j); } transformed[transformed.length - 1] = inst.classValue(); final Instance transformed_instance = new DenseInstance(inst.weight(), transformed); out.add(transformed_instance); transformed_instance.setDataset(out); } out.setClassIndex(out.numAttributes() - 1); return out; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.Experiments.java
License:Open Source License
/** * Constructs an InformationTheoreticMetricLearner from a set of labeled * state vector differences.//from w w w . ja va 2 s .c om * * @param config * @param A0 * @param XL A labeled set of state vector differences. The label must be * the last attribute, and it must be 1 if the states are similar and 0 * if they are not. * @return */ private static <A> InformationTheoreticMetricLearner learnMetric(final Configuration config, final RealMatrix A0, final Instances XL) { final int d = XL.numAttributes() - 1; //XL.get( 0 ).getDimension(); System.out.println("d = " + d); final double u; final double ell; final double gamma = config.getDouble("itml.gamma"); // We will set 'ell' and 'u' using sample quantiles as described in // the ITML paper. final QuantileAccumulator qacc = new QuantileAccumulator(0.05, 0.95); final ArrayList<double[]> S = new ArrayList<double[]>(); final ArrayList<double[]> D = new ArrayList<double[]>(); for (int i = 0; i < XL.size(); ++i) { final Instance ii = XL.get(i); final double diff[] = new double[d]; for (int j = 0; j < d; ++j) { diff[j] = ii.value(j); } if (ii.classValue() == 0.0) { D.add(diff); } else { S.add(diff); } qacc.add(Math.sqrt(HilbertSpace.inner_prod(diff, A0, diff))); } // Set bounds to quantile esimates ell = qacc.estimates[0]; u = qacc.estimates[1]; System.out.println("ITML: ell = " + ell); System.out.println("ITML: u = " + u); final InformationTheoreticMetricLearner itml = new InformationTheoreticMetricLearner(S, D, u, ell, A0, gamma, config.rng); itml.run(); return itml; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.Experiments.java
License:Open Source License
private static <A> MetricConstrainedKMeans makeClustering(final Configuration config, final RealMatrix A0, final Instances instances, final ArrayList<RealVector> XU, final boolean with_metric_learning) { final int K = config.getInt("cluster.k"); final int d = instances.numAttributes(); //XL.get( 0 ).getDimension(); final double u; final double ell; final double gamma = config.getDouble("itml.gamma"); // We will set 'ell' and 'u' using sample quantiles as described in // the ITML paper. final QuantileAccumulator qacc = new QuantileAccumulator(0.05, 0.95); final ArrayList<RealVector> X = new ArrayList<RealVector>(); X.addAll(XL);/* ww w. j a va 2 s.com*/ X.addAll(XU); // Must-link and Cannot-link constraints in the form that // MetricConstrainedKMeans wants them final TIntObjectMap<Pair<int[], double[]>> M = new TIntObjectHashMap<Pair<int[], double[]>>(); final TIntObjectMap<Pair<int[], double[]>> C = new TIntObjectHashMap<Pair<int[], double[]>>(); for (int i = 0; i < XL.size(); ++i) { final TIntList m = new TIntArrayList(); final TIntList c = new TIntArrayList(); for (int j = i + 1; j < XL.size(); ++j) { if (y.get(i).equals(y.get(j))) { m.add(j); } else { c.add(j); } qacc.add(Math.sqrt(HilbertSpace.inner_prod(XL.get(i), A0, XL.get(j)))); } M.put(i, Pair.makePair(m.toArray(), Fn.repeat(1.0, m.size()))); C.put(i, Pair.makePair(c.toArray(), Fn.repeat(1.0, c.size()))); } // Set bounds to quantile esimates ell = qacc.estimates[0]; u = qacc.estimates[1]; System.out.println("ITML: ell = " + ell); System.out.println("ITML: u = " + u); // Similar pairs for MetricLearner final ArrayList<int[]> S = new ArrayList<int[]>(); M.forEachKey(new TIntProcedure() { @Override public boolean execute(final int i) { final Pair<int[], double[]> p = M.get(i); if (p != null) { for (final int j : p.first) { S.add(new int[] { i, j }); } } return true; } }); // Disimilar pairs for MetricLearner final ArrayList<int[]> D = new ArrayList<int[]>(); C.forEachKey(new TIntProcedure() { @Override public boolean execute(final int i) { final Pair<int[], double[]> p = C.get(i); if (p != null) { for (final int j : p.first) { D.add(new int[] { i, j }); } } return true; } }); final RealMatrix A; if (with_metric_learning) { final InformationTheoreticMetricLearner itml = new InformationTheoreticMetricLearner(X, S, D, u, ell, A0, gamma, config.rng); itml.run(); A = itml.A(); } else { A = A0; } final MetricConstrainedKMeans kmeans = new MetricConstrainedKMeans(K, d, X, A, M, C, config.rng); kmeans.run(); return kmeans; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.Experiments.java
License:Open Source License
/** * Creates a labeled dataset of states pair with optimal actions. Action * labels are represented as indexes into an array list. Mappings in both * directions are also returned./*from w w w . ja v a 2s. c o m*/ * @param config * @param attributes * @param data * @param labels * @param iter * @return */ private static <A extends VirtualConstructor<A>> SingleInstanceDataset<A> makeSingleInstanceDataset( final Configuration config, final ArrayList<Attribute> attributes, final ArrayList<double[]> data, final ArrayList<A> labels, final ArrayList<Pair<ArrayList<A>, TDoubleList>> qtable, final int iter) { // System.out.println( "data.size() = " + data.size() ); final int[] ii = Fn.range(0, data.size()); Fn.shuffle(config.rng, ii); final HashMap<A, Integer> action_to_int = new HashMap<A, Integer>(); final ArrayList<A> int_to_action = new ArrayList<A>(); final ArrayList<Pair<ArrayList<A>, TDoubleList>> abridged_qtable = (qtable != null ? new ArrayList<Pair<ArrayList<A>, TDoubleList>>() : null); final TIntArrayList counts = new TIntArrayList(); final int max_per_label = config.getInt("training.max_per_label"); final int max_instances = config.getInt("training.max_single"); final ArrayList<DenseInstance> instance_list = new ArrayList<DenseInstance>(); for (int i = 0; i < Math.min(data.size(), max_instances); ++i) { final int idx = ii[i]; final A a = labels.get(idx); final Integer idx_obj = action_to_int.get(a); final int label; if (idx_obj == null) { // System.out.println( "\tNew action: " + a ); label = int_to_action.size(); int_to_action.add(a); action_to_int.put(a, label); counts.add(0); } else { // System.out.println( "\tRepeat action: " + a ); label = idx_obj; } final int c = counts.get(label); if (max_per_label <= 0 || c < max_per_label) { // System.out.println( "Adding " + label ); final double[] phi = Fn.append(data.get(idx), label); final DenseInstance instance = new DenseInstance(1.0, phi); instance_list.add(instance); counts.set(label, c + 1); if (qtable != null) { abridged_qtable.add(qtable.get(idx)); } } } final int Nlabels = int_to_action.size(); final ArrayList<Attribute> labeled_attributes = addLabelToAttributes(attributes, Nlabels); final Instances instances = new Instances(deriveDatasetName(config.training_data_single, iter), labeled_attributes, counts.sum()); instances.setClassIndex(instances.numAttributes() - 1); for (final DenseInstance instance : instance_list) { instances.add(instance); instance.setDataset(instances); } return new SingleInstanceDataset<A>(instances, action_to_int, int_to_action, abridged_qtable); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java
License:Open Source License
public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> Instances makePairDataset( final RandomGenerator rng, final int max_pairwise_instances, final Instances single, final InstanceCombiner combiner) { // final int max_pairwise = config.getInt( "training.max_pairwise" ); final ReservoirSampleAccumulator<Instance> negative = new ReservoirSampleAccumulator<Instance>(rng, max_pairwise_instances);/*from w w w. java2 s . c o m*/ final ReservoirSampleAccumulator<Instance> positive = new ReservoirSampleAccumulator<Instance>(rng, max_pairwise_instances); for (int i = 0; i < single.size(); ++i) { // if( i % 100 == 0 ) { // System.out.println( "i = " + i ); // } for (int j = i + 1; j < single.size(); ++j) { final Instance ii = single.get(i); final Instance ij = single.get(j); final int label; if (ii.classValue() == ij.classValue()) { label = 1; if (positive.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); positive.addPending(pair_instance); } } else { label = 0; if (negative.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); negative.addPending(pair_instance); } } } } final int N = Math.min(negative.samples().size(), positive.samples().size()); final String dataset_name = "train_" + combiner.keyword() + "_" + max_pairwise_instances; final Instances x = new Instances(dataset_name, combiner.attributes(), 2 * N); x.setClassIndex(x.numAttributes() - 1); for (final Instance ineg : negative.samples()) { x.add(ineg); } for (final Instance ipos : positive.samples()) { x.add(ipos); } return x; // return new PairDataset( x, combiner ); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java
License:Open Source License
/** * Constructs one positive pair and one negative pair involving each * data point in 'single'.//from w ww . ja v a 2 s . c om * @param rng * @param max_pairwise_instances * @param single * @param combiner * @return */ public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> PairDataset makeBalancedPairDataset( final RandomGenerator rng, final int negative_per_instance, final int positive_per_instance, final Instances single, final InstanceCombiner combiner) { final int Nnegative = negative_per_instance * single.size(); final int Npositive = positive_per_instance * single.size(); // final int max_pairwise = config.getInt( "training.max_pairwise" ); final ReservoirSampleAccumulator<Pair<Instance, int[]>> negative = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Nnegative); final ReservoirSampleAccumulator<Pair<Instance, int[]>> positive = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Npositive); for (int i = 0; i < single.size(); ++i) { // if( i % 100 == 0 ) { // System.out.println( "i = " + i ); // } for (int j = i + 1; j < single.size(); ++j) { final Instance ii = single.get(i); final Instance ij = single.get(j); final int label; if (ii.classValue() == ij.classValue()) { label = 1; if (positive.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); positive.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } else { label = 0; if (negative.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); negative.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } } } final int N = Math.min(negative.samples().size(), positive.samples().size()); final String dataset_name = "train_" + combiner.keyword() + "_" + Nnegative + "x" + Npositive; final Instances x = new Instances(dataset_name, combiner.attributes(), Nnegative + Npositive); x.setClassIndex(x.numAttributes() - 1); final ArrayList<int[]> matches = new ArrayList<int[]>(); for (final Pair<Instance, int[]> ineg : negative.samples()) { WekaUtil.addInstance(x, ineg.first); matches.add(ineg.second); } for (final Pair<Instance, int[]> ipos : positive.samples()) { WekaUtil.addInstance(x, ipos.first); matches.add(ipos.second); } return new PairDataset(x, matches, combiner); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java
License:Open Source License
/** * Constructs one positive pair and one negative pair involving each * data point in 'single'.// w w w. ja va 2 s . c o m * @param rng * @param max_pairwise_instances * @param single * @param combiner * @return */ public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> PairDataset makePlausiblePairDataset( final RandomGenerator rng, final int negative_per_instance, final int positive_per_instance, final Instances single, final InstanceCombiner combiner, final Fn.Function2<Boolean, Instance, Instance> plausible_p) { final int Nnegative = negative_per_instance * single.size(); final int Npositive = positive_per_instance * single.size(); // final int max_pairwise = config.getInt( "training.max_pairwise" ); final ReservoirSampleAccumulator<Pair<Instance, int[]>> negative = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Nnegative); final ReservoirSampleAccumulator<Pair<Instance, int[]>> positive = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Npositive); for (int i = 0; i < single.size(); ++i) { // if( i % 100 == 0 ) { // System.out.println( "i = " + i ); // } for (int j = i + 1; j < single.size(); ++j) { final Instance ii = single.get(i); final Instance ij = single.get(j); if (!plausible_p.apply(ii, ij)) { // System.out.println( "Not plausible: " + ii + " != " + ij ); continue; } // System.out.println( "! Plausible: " + ii + " == " + ij ); final int label; if (ii.classValue() == ij.classValue()) { label = 1; if (positive.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); positive.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } else { label = 0; if (negative.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); negative.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } } } final int N = Math.min(negative.samples().size(), positive.samples().size()); final String dataset_name = "train_" + combiner.keyword() + "_" + negative.samples().size() + "x" + positive.samples().size(); final Instances x = new Instances(dataset_name, combiner.attributes(), Nnegative + Npositive); x.setClassIndex(x.numAttributes() - 1); final ArrayList<int[]> matches = new ArrayList<int[]>(); for (final Pair<Instance, int[]> ineg : negative.samples()) { WekaUtil.addInstance(x, ineg.first); matches.add(ineg.second); } for (final Pair<Instance, int[]> ipos : positive.samples()) { WekaUtil.addInstance(x, ipos.first); matches.add(ipos.second); } return new PairDataset(x, matches, combiner); }