Example usage for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex()

Source Link

Document

Returns the class attribute's index.

Usage

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

public static Pair<ArrayList<double[]>, int[]> splitLabels(final Instances train) {
    assert (train.classAttribute() != null);

    final ArrayList<double[]> X = new ArrayList<double[]>();
    final int[] Y = new int[train.size()];

    for (int i = 0; i < train.size(); ++i) {
        final Instance inst = train.get(i);
        final double[] x = new double[train.numAttributes() - 1];
        int idx = 0;
        for (int j = 0; j < train.numAttributes(); ++j) {
            if (j == train.classIndex()) {
                Y[i] = (int) inst.classValue();
            } else {
                x[idx++] = inst.value(j);
            }//  w w  w.ja  v a2 s  .  co m
        }
        X.add(x);
    }

    return Pair.makePair(X, Y);
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

public static Instances allPairwiseProducts(final Instances single, final boolean reflexive,
        final boolean symmetric) {
    final int c = single.classIndex();
    System.out.println("Class attribute = " + c);

    final ArrayList<Attribute> pair_attributes = new ArrayList<Attribute>();
    for (int i = 0; i < single.numAttributes(); ++i) {
        if (i == c) {
            continue;
        }//from ww w  .  j ava2s  .  com
        final Attribute ai = single.attribute(i);
        final int j0 = (symmetric ? 0 : i);
        for (int j = j0; j < single.numAttributes(); ++j) {
            if (j == c) {
                continue;
            }
            if (!reflexive && i == j) {
                continue;
            }

            final Attribute aj = single.attribute(j);

            final String name = ai.name() + "_x_" + aj.name();
            pair_attributes.add(new Attribute(name));
        }
    }

    String pair_name = single.relationName();
    pair_name += "_x";
    if (reflexive) {
        pair_name += "r";
    }
    if (symmetric) {
        pair_name += "s";
    }
    pair_name += "_";
    pair_name += single.relationName();
    final Instances result = new Instances(pair_name, pair_attributes, 0);

    for (final Instance inst : single) {
        final double[] xp = new double[pair_attributes.size()];
        int idx = 0;
        for (int i = 0; i < single.numAttributes(); ++i) {
            if (i == c) {
                continue;
            }
            final double xi = inst.value(i);
            final int j0 = (symmetric ? 0 : i);
            for (int j = j0; j < single.numAttributes(); ++j) {
                if (j == c) {
                    continue;
                }
                if (!reflexive && i == j) {
                    continue;
                }
                final double xj = inst.value(j);
                xp[idx++] = xi * xj;
            }
        }
        WekaUtil.addInstance(result, new DenseInstance(inst.weight(), xp));
    }

    return result;
}

From source file:edu.oregonstate.eecs.mcplan.ml.Memorizer.java

License:Open Source License

@Override
public void buildClassifier(final Instances data) throws Exception {
    Nclasses_ = data.numClasses();/* w  w w  . j a  v  a2 s .  c  o m*/
    class_idx_ = data.classIndex();
    for (final Instance i : data) {
        final double[] x = i.toDoubleArray();
        final int c = (int) x[class_idx_];
        x[class_idx_] = 0;
        m_.put(new ArrayHolder(x), c);
    }
}

From source file:edu.oregonstate.eecs.mcplan.ml.WekaGlue.java

License:Open Source License

public static SequentialProjectionHashLearner createSequentialProjectionHashLearner(final RandomGenerator rng,
        final Instances labeled, final Instances unlabeled, final int K, final double eta, final double alpha) {
    assert (labeled.classIndex() >= 0);
    final int Nfeatures = labeled.numAttributes() - 1;

    final RealMatrix X = new Array2DRowRealMatrix(Nfeatures, labeled.size() + unlabeled.size());
    final RealMatrix XL = new Array2DRowRealMatrix(Nfeatures, labeled.size() * 2);
    final RealMatrix S = new Array2DRowRealMatrix(XL.getColumnDimension(), XL.getColumnDimension());

    for (int j = 0; j < labeled.size(); ++j) {
        final Instance inst = labeled.get(j);
        for (int i = 0; i < XL.getRowDimension(); ++i) {
            X.setEntry(i, j, inst.value(i));
            XL.setEntry(i, j, inst.value(i));
        }/*from   www  .  j  a  v a2 s.  c o  m*/

        int sj = -1;
        Instance s = null;
        do {
            sj = rng.nextInt(labeled.size());
            s = labeled.get(sj);
        } while (s == inst || s.classValue() != inst.classValue());
        S.setEntry(j, sj, 1);

        int dj = -1;
        Instance d = null;
        do {
            dj = rng.nextInt(labeled.size());
            d = labeled.get(dj);
        } while (d == inst || d.classValue() == inst.classValue());
        S.setEntry(j, dj, -1);
    }

    for (int j = 0; j < unlabeled.size(); ++j) {
        final Instance inst = unlabeled.get(j);
        for (int i = 0; i < X.getRowDimension(); ++i) {
            X.setEntry(i, labeled.size() + j, inst.value(i));
        }
    }

    return new SequentialProjectionHashLearner(X, XL, S, K, eta, alpha);
}

From source file:edu.uga.cs.fluxbuster.classification.Classifier.java

License:Open Source License

/**
 * Executes the classifier./* w  w w .  ja  v  a 2s.  com*/
 * 
 * @param prepfeatures the prepared features in arff format
 * @param modelfile the path to the serialized model
 * @param clusters the clusters to classify
 * @return a map of the classified clusters, the keys are the classes
 *       and the values are lists of cluster id's belonging to those classes
 */
private Map<ClusterClass, List<StoredDomainCluster>> executeClassifier(String prepfeatures, String modelfile,
        List<StoredDomainCluster> clusters) {
    Map<ClusterClass, List<StoredDomainCluster>> retval = new HashMap<ClusterClass, List<StoredDomainCluster>>();
    try {
        DataSource source = new DataSource(new ByteArrayInputStream(prepfeatures.getBytes()));
        Instances data = source.getDataSet();
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }
        String[] options = weka.core.Utils.splitOptions("-p 0");
        J48 cls = (J48) weka.core.SerializationHelper.read(modelfile);
        cls.setOptions(options);
        for (int i = 0; i < data.numInstances(); i++) {
            double pred = cls.classifyInstance(data.instance(i));
            ClusterClass clusClass = ClusterClass
                    .valueOf(data.classAttribute().value((int) pred).toUpperCase());
            if (!retval.containsKey(clusClass)) {
                retval.put(clusClass, new ArrayList<StoredDomainCluster>());
            }
            retval.get(clusClass).add(clusters.get(i));
        }
    } catch (Exception e) {
        if (log.isErrorEnabled()) {
            log.error("Error executing classifier.", e);
        }
    }
    return retval;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts the instances in the given dataset to binary, setting the specified labels to positive.
 * Note this method is destructive to data, directly modifying its contents.
 * @param data the multiclass dataset to be converted to binary.
 * @param positiveClassValue the class value to treat as positive.
 *//*from   ww  w  .ja va  2s . co  m*/
public static void convertMulticlassToBinary(Instances data, String positiveClassValue) {

    // ensure that data is nominal
    if (!data.classAttribute().isNominal())
        throw new IllegalArgumentException("Instances must have a nominal class.");

    // create the new class attribute
    FastVector newClasses = new FastVector(2);
    newClasses.addElement("Y");
    newClasses.addElement("N");
    Attribute newClassAttribute = new Attribute("class", newClasses);

    // alter the class attribute to be binary
    int newClassAttIdx = data.classIndex();
    data.insertAttributeAt(newClassAttribute, newClassAttIdx);
    int classAttIdx = data.classIndex();

    // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively)
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        Instance inst = data.instance(instIdx);
        if (inst.stringValue(classAttIdx).equals(positiveClassValue)) {
            inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y
        } else {
            inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0
        }
    }

    // switch the class index to the new class and delete the old class
    data.setClassIndex(newClassAttIdx);
    data.deleteAttributeAt(classAttIdx);

    // alter the dataset name
    data.setRelationName(data.relationName() + "-" + positiveClassValue);
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts a set of instances to svm-light format
 * @param data the weka instances/* w  w w .  ja  va  2s  . c  o  m*/
 * @return the weka instances in svm-light format
 */
public static String arffToSVMLight(Instances data, SVMLightLabelFormat labelFormat) {

    if (labelFormat == SVMLightLabelFormat.CLASSIFICATION && data.numClasses() != 2) {
        throw new IllegalArgumentException(
                "SVM-light classification label format requires that the data contain only two classes.");
    }

    String str = "";
    String endline = System.getProperty("line.separator");

    int numInstances = data.numInstances();
    int numAttributes = data.numAttributes();
    int classAttIdx = data.classIndex();

    for (int instIdx = 0; instIdx < numInstances; instIdx++) {

        Instance inst = data.instance(instIdx);

        // convert the instance label
        if (labelFormat == SVMLightLabelFormat.CLASSIFICATION) {
            str += (inst.classValue() == 0) ? "-1" : "1";
        } else {
            str += inst.classValue();
        }

        str += " ";

        // convert each feature
        for (int attIdx = 0; attIdx < numAttributes; attIdx++) {
            // skip the class attribute
            if (attIdx == classAttIdx)
                continue;
            str += (attIdx + 1) + ":" + inst.value(attIdx) + " ";
        }

        // append the instance info string
        str += "# " + instIdx;

        str += endline;
    }

    return str;
}

From source file:entity.DifficultyResamplingManager.java

License:Open Source License

/**
 * Return max dimensions of subdataset for a PR (total, p, n)
 * @param originalDataset//w  ww.ja v  a2s  . c om
 * @param positiveExamplePercentProportion
 * @return
 */
public SubdatasetDimensions calculateSubdatasetDimensionsForProportion(Instances originalDataset,
        BigDecimal positiveExamplePercentProportion) {

    // size of subdataset, initialized to original size
    int total = originalDataset.numInstances();
    // number of positive instances
    int p = 0;
    // number of negative instances
    int n = 0;
    // current PR
    int pp = 0;

    // count positives
    for (int i = 0; i < total; i++) {
        if (originalDataset.instance(i).stringValue(originalDataset.classIndex()).equals(Settings.buggyLabel)) {
            p++;
        }
    }

    n = total - p;

    // finds actual PR
    pp = calculatePositivePercentCeil(p + n, p);

    if (verbose)
        System.out.println(
                "[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] attuale: p=" + p
                        + " n=" + n + " pp = " + pp);

    // if current PR equals desired one, return current dimensions
    if (pp == positiveExamplePercentProportion.intValue())
        return new SubdatasetDimensions(p, n);

    // if current PR is greater than the desired one
    // decrements p until ceiling of current PR is greater than the desired one
    if (pp > positiveExamplePercentProportion.intValue()) {
        while (pp > positiveExamplePercentProportion.intValue()) {
            p--;
            pp = calculatePositivePercentCeil(p + n, p);
            if (verbose)
                System.out
                        .println("[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] p="
                                + p + " n=" + n + " pp = " + pp);
        }
        // goes back if the previous PR was "nearer" to the desired than the current one
        if (isPPPNearerThanPPToDesiredPercent(calculatePositivePercentCeil(p + 1 + n, p + 1), pp,
                positiveExamplePercentProportion.intValue())) {
            p++;
            pp = calculatePositivePercentCeil(p + n, p);
        }
    }

    // if current PR is less than the desired one
    // decrements n until ceiling of current PR is less than the desired one
    if (pp < positiveExamplePercentProportion.intValue()) {
        while (pp < positiveExamplePercentProportion.intValue()) {
            n--;
            pp = calculatePositivePercentCeil(p + n, p);
            if (verbose)
                System.out
                        .println("[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] p="
                                + p + " n=" + n + " pp = " + pp);
        }
        // goes back if the previous PR was "nearer" to the desired than the current one
        if (isPPPNearerThanPPToDesiredPercent(calculatePositivePercentCeil(p + n + 1, p), pp,
                positiveExamplePercentProportion.intValue())) {
            n++;
            pp = calculatePositivePercentCeil(p + n, p);
        }
    }

    if (verbose)
        System.out
                .println("[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] finale p="
                        + p + " n=" + n + " pp = " + pp);
    return new SubdatasetDimensions(p, n);
}

From source file:entity.DifficultyResamplingManager.java

License:Open Source License

/**
 * called by generateResampledSubdataset
 * /*from w  ww .  ja va  2 s. co  m*/
 * @param originalDataset
 * @param subdatasetDimensions
 * @return
 */
private Instances generateResampledSubdataset(Instances originalDataset,
        SubdatasetDimensions subdatasetDimensions) {

    // creates an empty dataset
    Instances resampledSubdataset = new Instances(originalDataset);
    resampledSubdataset.delete();

    // randomize dataset instances order
    originalDataset.randomize(RandomizationManager.randomGenerator);

    // calc number of positives to insert
    int positivesToInsert = subdatasetDimensions.getP();
    if (verbose)
        System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] positivesToInsert = "
                + positivesToInsert);

    // calc number of negatives to insert
    int negativesToInsert = subdatasetDimensions.getN();

    // iterates over the original dataset instances
    for (int i = 0; i < originalDataset.numInstances(); i++) {
        // if instance is positive and more are needed in the new dataset, inserts into new dataset
        if ((positivesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex())
                .equals(Settings.buggyLabel))) {
            resampledSubdataset.add(originalDataset.instance(i));
            positivesToInsert--;
        }

        // if instance is negative and more are needed in the new dataset, inserts into new dataset
        if ((negativesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex())
                .equals(Settings.nonbuggyLabel))) {
            resampledSubdataset.add(originalDataset.instance(i));
            negativesToInsert--;
        }

    }

    if (verbose)
        System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] resampling terminato: "
                + this.printDatasetInfo(resampledSubdataset));
    return resampledSubdataset;
}

From source file:entity.DifficultyResamplingManager.java

License:Open Source License

/**
 * prints number of posive and negative instances and respective percentaghes
 * @param dataset/*from  ww  w .java 2s  .c o m*/
 * @return
 */
public String printDatasetInfo(Instances dataset) {

    int positives = 0;
    int negatives = 0;

    for (int i = 0; i < dataset.numInstances(); i++) {

        if (dataset.instance(i).stringValue(dataset.classIndex()).equals(Settings.buggyLabel)) {
            positives++;
        }

        if (dataset.instance(i).stringValue(dataset.classIndex()).equals(Settings.nonbuggyLabel)) {
            negatives++;
        }
    }

    double percent = ((double) positives / (double) dataset.numInstances()) * 100;
    return new String("totale istanze: " + dataset.numInstances() + ", p+n=" + (positives + negatives) + ", p: "
            + positives + ", n: " + negatives + ", %p : " + percent);
}