Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java

License:Open Source License

/**
 * Given a (possibly empty) Instances object containing the required weka Attributes, generates a weka Instance for a
 * single data point./*from w w w .  j ava  2  s.  co  m*/
 *
 * @param instances  the weka Instances object containing attributes
 * @param data_point the data point to convert
 * @return a weka instance with assigned attributes
 */
protected static Instance assignWekaAttributes(Instances instances, Word data_point) {
    double[] instance = new double[instances.numAttributes()];

    for (int i = 0; i < instances.numAttributes(); ++i) {
        Attribute attribute = instances.attribute(i);
        if (data_point.hasAttribute(attribute.name())
                && !data_point.getAttribute(attribute.name()).toString().equals("?")) {
            switch (attribute.type()) {
            case Attribute.NOMINAL:
                int index = attribute.indexOfValue(data_point.getAttribute(attribute.name()).toString());
                instance[i] = (double) index;
                break;
            case Attribute.NUMERIC:
                // Check if value is really a number.
                try {
                    instance[i] = Double.valueOf(data_point.getAttribute(attribute.name()).toString());
                } catch (NumberFormatException e) {
                    AuToBIUtils.error("Number expected for feature: " + attribute.name());
                }
                break;
            case Attribute.STRING:
                instance[i] = attribute.addStringValue(data_point.getAttribute(attribute.name()).toString());
                break;
            default:
                AuToBIUtils.error("Unknown attribute type");
            }
        } else {
            instance[i] = Utils.missingValue();
        }
    }

    Instance inst = new DenseInstance(1, instance);
    inst.setDataset(instances);
    return inst;
}

From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java

License:Open Source License

/**
 * Assigns a class attribute to a weka Instances object.
 * <p/>//  w  w  w . ja  v a 2s .  co  m
 * If no class attribute is given, or if the class attribute is not found in the list of attributes, the last
 * attribute is set to the class attribute.
 *
 * @param instances       the instances object
 * @param class_attribute the desired class attribute.
 */
static void setWekaClassAttribute(Instances instances, String class_attribute) {
    if (class_attribute != null) {
        int i = 0;
        boolean set = false;
        while (i < instances.numAttributes() && !set) {
            Attribute attr = instances.attribute(i);
            if (class_attribute.equals(attr.name())) {
                instances.setClassIndex(i);
                set = true;
            }
            ++i;
        }
        if (!set) {
            instances.setClassIndex(instances.numAttributes() - 1);
        }
    } else {
        instances.setClassIndex(instances.numAttributes() - 1);
    }
}

From source file:edu.insight.finlaw.multilabel.rough.CreateInstances.java

License:Open Source License

/**
 * Generates the Instances object and outputs it in ARFF format to stdout.
 *
 * @param args   ignored// ww  w  .j  a  va2 s .  c om
 * @throws Exception   if generation of instances fails
 */
public static void main(String[] args) throws Exception {
    ArrayList<Attribute> atts;
    ArrayList<Attribute> attsRel;
    ArrayList<String> attVals;
    ArrayList<String> attValsRel;
    Instances data;
    Instances dataRel;
    double[] vals;
    double[] valsRel;
    int i;

    // 1. set up attributes
    atts = new ArrayList<Attribute>();
    // - numeric
    atts.add(new Attribute("att1"));
    // - nominal
    attVals = new ArrayList<String>();
    for (i = 0; i < 5; i++)
        attVals.add("val" + (i + 1));
    atts.add(new Attribute("att2", attVals));
    // - string   
    atts.add(new Attribute("att3", (ArrayList<String>) null));
    // - date
    atts.add(new Attribute("att4", "yyyy-MM-dd"));
    // - relational
    attsRel = new ArrayList<Attribute>();
    // -- numeric
    attsRel.add(new Attribute("att5.1"));
    // -- nominal
    attValsRel = new ArrayList<String>();
    for (i = 0; i < 5; i++)
        attValsRel.add("val5." + (i + 1));
    attsRel.add(new Attribute("att5.2", attValsRel));
    dataRel = new Instances("att5", attsRel, 0);
    atts.add(new Attribute("att5", dataRel, 0));

    // 2. create Instances object
    data = new Instances("MyRelation", atts, 0);

    // 3. fill with data
    // first instance
    vals = new double[data.numAttributes()];
    // - numeric
    vals[0] = Math.PI;
    // - nominal
    vals[1] = attVals.indexOf("val3");
    // - string
    vals[2] = data.attribute(2).addStringValue("This is a string!");
    // - date
    vals[3] = data.attribute(3).parseDate("2001-11-09");
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 1;
    valsRel[1] = attValsRel.indexOf("val5.3");
    dataRel.add(new DenseInstance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 2;
    valsRel[1] = attValsRel.indexOf("val5.2");
    dataRel.add(new DenseInstance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new DenseInstance(1.0, vals));

    // second instance
    vals = new double[data.numAttributes()]; // important: needs NEW array!
    // - numeric
    vals[0] = Math.E;
    // - nominal
    vals[1] = attVals.indexOf("val1");
    // - string
    vals[2] = data.attribute(2).addStringValue("And another one!");
    // - date
    vals[3] = data.attribute(3).parseDate("2000-12-01");
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 1;
    valsRel[1] = attValsRel.indexOf("val5.4");
    dataRel.add(new DenseInstance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 2;
    valsRel[1] = attValsRel.indexOf("val5.1");
    dataRel.add(new DenseInstance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new DenseInstance(1.0, vals));

    // 4. output data
    System.out.println(data);
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

License:Open Source License

public static Instances transformInstances(final Instances src, final CoordinateTransform transform) {
    final ArrayList<Attribute> out_attributes = new ArrayList<Attribute>();
    for (int i = 0; i < transform.outDimension(); ++i) {
        out_attributes.add(new Attribute("x" + i));
    }//  ww w  . j av a 2s  .co m
    out_attributes.add((Attribute) src.classAttribute().copy());
    final Instances out = new Instances(src.relationName() + "_" + transform.name(), out_attributes, 0);
    for (int i = 0; i < src.size(); ++i) {
        final Instance inst = src.get(i);
        final RealVector flat = new ArrayRealVector(WekaUtil.unlabeledFeatures(inst));
        final RealVector transformed_vector = transform.encode(flat).x;
        final double[] transformed = new double[transformed_vector.getDimension() + 1];
        for (int j = 0; j < transformed_vector.getDimension(); ++j) {
            transformed[j] = transformed_vector.getEntry(j);
        }
        transformed[transformed.length - 1] = inst.classValue();
        final Instance transformed_instance = new DenseInstance(inst.weight(), transformed);
        out.add(transformed_instance);
        transformed_instance.setDataset(out);
    }
    out.setClassIndex(out.numAttributes() - 1);
    return out;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.Experiments.java

License:Open Source License

/**
 * Constructs an InformationTheoreticMetricLearner from a set of labeled
 * state vector differences.//from   w w  w . ja  va  2  s  .c om
 * 
 * @param config
 * @param A0
 * @param XL A labeled set of state vector differences. The label must be
 * the last attribute, and it must be 1 if the states are similar and 0
 * if they are not.
 * @return
 */
private static <A> InformationTheoreticMetricLearner learnMetric(final Configuration config,
        final RealMatrix A0, final Instances XL) {
    final int d = XL.numAttributes() - 1; //XL.get( 0 ).getDimension();
    System.out.println("d = " + d);
    final double u;
    final double ell;
    final double gamma = config.getDouble("itml.gamma");
    // We will set 'ell' and 'u' using sample quantiles as described in
    // the ITML paper.
    final QuantileAccumulator qacc = new QuantileAccumulator(0.05, 0.95);

    final ArrayList<double[]> S = new ArrayList<double[]>();
    final ArrayList<double[]> D = new ArrayList<double[]>();
    for (int i = 0; i < XL.size(); ++i) {
        final Instance ii = XL.get(i);
        final double diff[] = new double[d];
        for (int j = 0; j < d; ++j) {
            diff[j] = ii.value(j);
        }

        if (ii.classValue() == 0.0) {
            D.add(diff);
        } else {
            S.add(diff);
        }

        qacc.add(Math.sqrt(HilbertSpace.inner_prod(diff, A0, diff)));
    }
    // Set bounds to quantile esimates
    ell = qacc.estimates[0];
    u = qacc.estimates[1];
    System.out.println("ITML: ell = " + ell);
    System.out.println("ITML: u = " + u);

    final InformationTheoreticMetricLearner itml = new InformationTheoreticMetricLearner(S, D, u, ell, A0,
            gamma, config.rng);
    itml.run();
    return itml;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.Experiments.java

License:Open Source License

private static <A> MetricConstrainedKMeans makeClustering(final Configuration config, final RealMatrix A0,
        final Instances instances, final ArrayList<RealVector> XU, final boolean with_metric_learning) {
    final int K = config.getInt("cluster.k");
    final int d = instances.numAttributes(); //XL.get( 0 ).getDimension();
    final double u;
    final double ell;
    final double gamma = config.getDouble("itml.gamma");
    // We will set 'ell' and 'u' using sample quantiles as described in
    // the ITML paper.
    final QuantileAccumulator qacc = new QuantileAccumulator(0.05, 0.95);

    final ArrayList<RealVector> X = new ArrayList<RealVector>();
    X.addAll(XL);/*  ww w. j  a  va 2  s.com*/
    X.addAll(XU);

    // Must-link and Cannot-link constraints in the form that
    // MetricConstrainedKMeans wants them
    final TIntObjectMap<Pair<int[], double[]>> M = new TIntObjectHashMap<Pair<int[], double[]>>();
    final TIntObjectMap<Pair<int[], double[]>> C = new TIntObjectHashMap<Pair<int[], double[]>>();

    for (int i = 0; i < XL.size(); ++i) {
        final TIntList m = new TIntArrayList();
        final TIntList c = new TIntArrayList();
        for (int j = i + 1; j < XL.size(); ++j) {
            if (y.get(i).equals(y.get(j))) {
                m.add(j);
            } else {
                c.add(j);
            }
            qacc.add(Math.sqrt(HilbertSpace.inner_prod(XL.get(i), A0, XL.get(j))));
        }
        M.put(i, Pair.makePair(m.toArray(), Fn.repeat(1.0, m.size())));
        C.put(i, Pair.makePair(c.toArray(), Fn.repeat(1.0, c.size())));
    }
    // Set bounds to quantile esimates
    ell = qacc.estimates[0];
    u = qacc.estimates[1];
    System.out.println("ITML: ell = " + ell);
    System.out.println("ITML: u = " + u);

    // Similar pairs for MetricLearner
    final ArrayList<int[]> S = new ArrayList<int[]>();
    M.forEachKey(new TIntProcedure() {
        @Override
        public boolean execute(final int i) {
            final Pair<int[], double[]> p = M.get(i);
            if (p != null) {
                for (final int j : p.first) {
                    S.add(new int[] { i, j });
                }
            }
            return true;
        }
    });

    // Disimilar pairs for MetricLearner
    final ArrayList<int[]> D = new ArrayList<int[]>();
    C.forEachKey(new TIntProcedure() {
        @Override
        public boolean execute(final int i) {
            final Pair<int[], double[]> p = C.get(i);
            if (p != null) {
                for (final int j : p.first) {
                    D.add(new int[] { i, j });
                }
            }
            return true;
        }
    });

    final RealMatrix A;
    if (with_metric_learning) {
        final InformationTheoreticMetricLearner itml = new InformationTheoreticMetricLearner(X, S, D, u, ell,
                A0, gamma, config.rng);
        itml.run();
        A = itml.A();
    } else {
        A = A0;
    }

    final MetricConstrainedKMeans kmeans = new MetricConstrainedKMeans(K, d, X, A, M, C, config.rng);
    kmeans.run();
    return kmeans;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.Experiments.java

License:Open Source License

/**
 * Creates a labeled dataset of states pair with optimal actions. Action
 * labels are represented as indexes into an array list. Mappings in both
 * directions are also returned./*from w w w . ja v a 2s. c  o  m*/
 * @param config
 * @param attributes
 * @param data
 * @param labels
 * @param iter
 * @return
 */
private static <A extends VirtualConstructor<A>> SingleInstanceDataset<A> makeSingleInstanceDataset(
        final Configuration config, final ArrayList<Attribute> attributes, final ArrayList<double[]> data,
        final ArrayList<A> labels, final ArrayList<Pair<ArrayList<A>, TDoubleList>> qtable, final int iter) {
    //      System.out.println( "data.size() = " + data.size() );
    final int[] ii = Fn.range(0, data.size());
    Fn.shuffle(config.rng, ii);

    final HashMap<A, Integer> action_to_int = new HashMap<A, Integer>();
    final ArrayList<A> int_to_action = new ArrayList<A>();
    final ArrayList<Pair<ArrayList<A>, TDoubleList>> abridged_qtable = (qtable != null
            ? new ArrayList<Pair<ArrayList<A>, TDoubleList>>()
            : null);

    final TIntArrayList counts = new TIntArrayList();
    final int max_per_label = config.getInt("training.max_per_label");
    final int max_instances = config.getInt("training.max_single");

    final ArrayList<DenseInstance> instance_list = new ArrayList<DenseInstance>();
    for (int i = 0; i < Math.min(data.size(), max_instances); ++i) {
        final int idx = ii[i];
        final A a = labels.get(idx);
        final Integer idx_obj = action_to_int.get(a);
        final int label;
        if (idx_obj == null) {
            //            System.out.println( "\tNew action: " + a );
            label = int_to_action.size();
            int_to_action.add(a);
            action_to_int.put(a, label);
            counts.add(0);
        } else {
            //            System.out.println( "\tRepeat action: " + a );
            label = idx_obj;
        }

        final int c = counts.get(label);
        if (max_per_label <= 0 || c < max_per_label) {
            //            System.out.println( "Adding " + label );
            final double[] phi = Fn.append(data.get(idx), label);
            final DenseInstance instance = new DenseInstance(1.0, phi);
            instance_list.add(instance);
            counts.set(label, c + 1);
            if (qtable != null) {
                abridged_qtable.add(qtable.get(idx));
            }
        }
    }

    final int Nlabels = int_to_action.size();
    final ArrayList<Attribute> labeled_attributes = addLabelToAttributes(attributes, Nlabels);

    final Instances instances = new Instances(deriveDatasetName(config.training_data_single, iter),
            labeled_attributes, counts.sum());
    instances.setClassIndex(instances.numAttributes() - 1);
    for (final DenseInstance instance : instance_list) {
        instances.add(instance);
        instance.setDataset(instances);
    }

    return new SingleInstanceDataset<A>(instances, action_to_int, int_to_action, abridged_qtable);
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java

License:Open Source License

public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> Instances makePairDataset(
        final RandomGenerator rng, final int max_pairwise_instances, final Instances single,
        final InstanceCombiner combiner) {
    //      final int max_pairwise = config.getInt( "training.max_pairwise" );
    final ReservoirSampleAccumulator<Instance> negative = new ReservoirSampleAccumulator<Instance>(rng,
            max_pairwise_instances);/*from   w  w  w.  java2 s . c  o m*/
    final ReservoirSampleAccumulator<Instance> positive = new ReservoirSampleAccumulator<Instance>(rng,
            max_pairwise_instances);

    for (int i = 0; i < single.size(); ++i) {
        //         if( i % 100 == 0 ) {
        //            System.out.println( "i = " + i );
        //         }
        for (int j = i + 1; j < single.size(); ++j) {
            final Instance ii = single.get(i);
            final Instance ij = single.get(j);
            final int label;
            if (ii.classValue() == ij.classValue()) {
                label = 1;
                if (positive.acceptNext()) {
                    final Instance pair_instance = combiner.apply(ii, ij, label);
                    positive.addPending(pair_instance);
                }
            } else {
                label = 0;
                if (negative.acceptNext()) {
                    final Instance pair_instance = combiner.apply(ii, ij, label);
                    negative.addPending(pair_instance);
                }
            }
        }
    }

    final int N = Math.min(negative.samples().size(), positive.samples().size());
    final String dataset_name = "train_" + combiner.keyword() + "_" + max_pairwise_instances;
    final Instances x = new Instances(dataset_name, combiner.attributes(), 2 * N);
    x.setClassIndex(x.numAttributes() - 1);
    for (final Instance ineg : negative.samples()) {
        x.add(ineg);
    }
    for (final Instance ipos : positive.samples()) {
        x.add(ipos);
    }

    return x;
    //      return new PairDataset( x, combiner );
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java

License:Open Source License

/**
 * Constructs one positive pair and one negative pair involving each
 * data point in 'single'.//from  w  ww . ja v  a 2  s  . c om
 * @param rng
 * @param max_pairwise_instances
 * @param single
 * @param combiner
 * @return
 */
public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> PairDataset makeBalancedPairDataset(
        final RandomGenerator rng, final int negative_per_instance, final int positive_per_instance,
        final Instances single, final InstanceCombiner combiner) {
    final int Nnegative = negative_per_instance * single.size();
    final int Npositive = positive_per_instance * single.size();
    //      final int max_pairwise = config.getInt( "training.max_pairwise" );
    final ReservoirSampleAccumulator<Pair<Instance, int[]>> negative = new ReservoirSampleAccumulator<Pair<Instance, int[]>>(
            rng, Nnegative);
    final ReservoirSampleAccumulator<Pair<Instance, int[]>> positive = new ReservoirSampleAccumulator<Pair<Instance, int[]>>(
            rng, Npositive);

    for (int i = 0; i < single.size(); ++i) {
        //         if( i % 100 == 0 ) {
        //            System.out.println( "i = " + i );
        //         }
        for (int j = i + 1; j < single.size(); ++j) {
            final Instance ii = single.get(i);
            final Instance ij = single.get(j);
            final int label;
            if (ii.classValue() == ij.classValue()) {
                label = 1;
                if (positive.acceptNext()) {
                    final Instance pair_instance = combiner.apply(ii, ij, label);
                    positive.addPending(Pair.makePair(pair_instance, new int[] { i, j }));
                }
            } else {
                label = 0;
                if (negative.acceptNext()) {
                    final Instance pair_instance = combiner.apply(ii, ij, label);
                    negative.addPending(Pair.makePair(pair_instance, new int[] { i, j }));
                }
            }
        }
    }

    final int N = Math.min(negative.samples().size(), positive.samples().size());
    final String dataset_name = "train_" + combiner.keyword() + "_" + Nnegative + "x" + Npositive;
    final Instances x = new Instances(dataset_name, combiner.attributes(), Nnegative + Npositive);
    x.setClassIndex(x.numAttributes() - 1);
    final ArrayList<int[]> matches = new ArrayList<int[]>();
    for (final Pair<Instance, int[]> ineg : negative.samples()) {
        WekaUtil.addInstance(x, ineg.first);
        matches.add(ineg.second);
    }
    for (final Pair<Instance, int[]> ipos : positive.samples()) {
        WekaUtil.addInstance(x, ipos.first);
        matches.add(ipos.second);
    }

    return new PairDataset(x, matches, combiner);
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java

License:Open Source License

/**
 * Constructs one positive pair and one negative pair involving each
 * data point in 'single'.//  w  w w. ja  va  2  s . c  o m
 * @param rng
 * @param max_pairwise_instances
 * @param single
 * @param combiner
 * @return
 */
public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> PairDataset makePlausiblePairDataset(
        final RandomGenerator rng, final int negative_per_instance, final int positive_per_instance,
        final Instances single, final InstanceCombiner combiner,
        final Fn.Function2<Boolean, Instance, Instance> plausible_p) {
    final int Nnegative = negative_per_instance * single.size();
    final int Npositive = positive_per_instance * single.size();
    //      final int max_pairwise = config.getInt( "training.max_pairwise" );
    final ReservoirSampleAccumulator<Pair<Instance, int[]>> negative = new ReservoirSampleAccumulator<Pair<Instance, int[]>>(
            rng, Nnegative);
    final ReservoirSampleAccumulator<Pair<Instance, int[]>> positive = new ReservoirSampleAccumulator<Pair<Instance, int[]>>(
            rng, Npositive);

    for (int i = 0; i < single.size(); ++i) {
        //         if( i % 100 == 0 ) {
        //            System.out.println( "i = " + i );
        //         }
        for (int j = i + 1; j < single.size(); ++j) {
            final Instance ii = single.get(i);
            final Instance ij = single.get(j);

            if (!plausible_p.apply(ii, ij)) {
                //               System.out.println( "Not plausible: " + ii + " != " + ij );
                continue;
            }

            //            System.out.println( "! Plausible: " + ii + " == " + ij );

            final int label;
            if (ii.classValue() == ij.classValue()) {
                label = 1;
                if (positive.acceptNext()) {
                    final Instance pair_instance = combiner.apply(ii, ij, label);
                    positive.addPending(Pair.makePair(pair_instance, new int[] { i, j }));
                }
            } else {
                label = 0;
                if (negative.acceptNext()) {
                    final Instance pair_instance = combiner.apply(ii, ij, label);
                    negative.addPending(Pair.makePair(pair_instance, new int[] { i, j }));
                }
            }
        }
    }

    final int N = Math.min(negative.samples().size(), positive.samples().size());
    final String dataset_name = "train_" + combiner.keyword() + "_" + negative.samples().size() + "x"
            + positive.samples().size();
    final Instances x = new Instances(dataset_name, combiner.attributes(), Nnegative + Npositive);
    x.setClassIndex(x.numAttributes() - 1);
    final ArrayList<int[]> matches = new ArrayList<int[]>();
    for (final Pair<Instance, int[]> ineg : negative.samples()) {
        WekaUtil.addInstance(x, ineg.first);
        matches.add(ineg.second);
    }
    for (final Pair<Instance, int[]> ipos : positive.samples()) {
        WekaUtil.addInstance(x, ipos.first);
        matches.add(ipos.second);
    }

    return new PairDataset(x, matches, combiner);
}