Example usage for weka.core Instances setClassIndex

List of usage examples for weka.core Instances setClassIndex

Introduction

In this page you can find the example usage for weka.core Instances setClassIndex.

Prototype

public void setClassIndex(int classIndex) 

Source Link

Document

Sets the class index of the set.

Usage

From source file:meka.classifiers.multilabel.cc.CNode.java

License:Open Source License

/**
 * Transform./*from   w  w w .  j  a va2s . c o  m*/
 * @param   D      original Instances
 * @param   c      to be the class Attribute
 * @param   pa_c   the parent indices of c
 * @return   new Instances T
 */
public static Instances transform(Instances D, int c, int pa_c[]) throws Exception {
    int L = D.classIndex();
    int keep[] = A.append(pa_c, c); // keep all parents and self!
    Arrays.sort(keep);
    int remv[] = A.invert(keep, L); // i.e., remove the rest < L
    Arrays.sort(remv);
    Instances T = F.remove(new Instances(D), remv, false);
    int map[] = new int[L];
    for (int j = 0; j < L; j++) {
        map[j] = Arrays.binarySearch(keep, j);
    }
    T.setClassIndex(map[c]);
    return T;
}

From source file:meka.classifiers.multilabel.cc.CNode.java

License:Open Source License

/**
 * Main - run some tests./*w  ww.j  a  v a2  s  . c om*/
 */
public static void main(String args[]) throws Exception {
    Instances D = new Instances(new FileReader(args[0]));
    Instance x = D.lastInstance();
    D.remove(D.numInstances() - 1);
    int L = Integer.parseInt(args[1]);
    D.setClassIndex(L);
    double y[] = new double[L];
    Random r = new Random();
    int s[] = new int[] { 1, 0, 2 };
    int PA_J[][] = new int[][] { {}, {}, { 0, 1 }, };

    //MLUtils.randomize(s,r);
    // MUST GO IN TREE ORDER !!
    for (int j : s) {
        int pa_j[] = PA_J[j];
        System.out.println("PARENTS = " + Arrays.toString(pa_j));
        //MLUtils.randomize(pa_j,r);
        System.out.println("**** TRAINING ***");
        CNode n = new CNode(j, null, pa_j);
        n.build(D, new SMO());
        /*
         */
        //Instances D_ = n.transform(D);
        //n.T = D_;
        System.out.println("============== D_" + j + " / class = " + n.T.classIndex() + " =");
        System.out.println("" + n.T);
        System.out.println("**** TESTING ****");
        /*
        Instance x_ = MLUtils.setTemplate(x,(Instance)D_.firstInstance().copy(),D_);
        for(int pa : pa_j) {
           //System.out.println(""+map[pa]);
           x_.setValue(n.map[pa],y[pa]);
        }
        //x_.setDataset(T);
        x_.setClassMissing();
         */
        //n.T = D_;
        Instance x_ = n.transform(x, y);
        System.out.println("" + x_);
        y[j] = 1;
    }
}

From source file:meka.classifiers.multilabel.Evaluation.java

License:Open Source License

/**
 * RunExperiment - Build and evaluate a model with command-line options.
 * @param   h      multi-label classifier
 * @param   options   command line options
 *///w ww .  ja v  a2 s.  c  o  m
public static void runExperiment(MultiLabelClassifier h, String options[]) throws Exception {

    // Help
    if (Utils.getOptionPos('h', options) >= 0) {
        System.out.println("\nHelp requested");
        Evaluation.printOptions(h.listOptions());
        return;
    }

    h.setOptions(options);

    if (h.getDebug())
        System.out.println("Loading and preparing dataset ...");

    // Load Instances from a file
    Instances D_train = loadDataset(options);

    Instances D_full = D_train;

    // Try extract and set a class index from the @relation name
    MLUtils.prepareData(D_train);

    // Override the number of classes with command-line option (optional)
    if (Utils.getOptionPos('C', options) >= 0) {
        int L = Integer.parseInt(Utils.getOption('C', options));
        D_train.setClassIndex(L);
    }

    // We we still haven't found -C option, we can't continue (don't know how many labels)
    int L = D_train.classIndex();
    if (L <= 0) {
        throw new Exception(
                "[Error] Number of labels not specified.\n\tYou must set the number of labels with the -C option, either inside the @relation tag of the Instances file, or on the command line.");
        // apparently the dataset didn't contain the '-C' flag, check in the command line options ...
    }

    // Randomize (Instances) 
    int seed = (Utils.getOptionPos('s', options) >= 0) ? Integer.parseInt(Utils.getOption('s', options)) : 0;
    if (Utils.getFlag('R', options)) {
        D_train.randomize(new Random(seed));
    }
    boolean Threaded = false;
    if (Utils.getOptionPos("Thr", options) >= 0) {
        Threaded = Utils.getFlag("Thr", options);
    }

    // Verbosity Option
    String voption = "1";
    if (Utils.getOptionPos("verbosity", options) >= 0) {
        voption = Utils.getOption("verbosity", options);
    }

    // Save for later?
    //String fname = null;
    //if (Utils.getOptionPos('f',options) >= 0) {
    //   fname = Utils.getOption('f',options);
    //}
    // Dump for later?
    String dname = null;
    if (Utils.getOptionPos('d', options) >= 0) {
        dname = Utils.getOption('d', options);
    }
    // Load from file?
    String lname = null;
    Instances dataHeader = null;
    if (Utils.getOptionPos('l', options) >= 0) {
        lname = Utils.getOption('l', options);
        Object[] data = SerializationHelper.readAll(lname);
        h = (MultiLabelClassifier) data[0];
        if (data.length > 1)
            dataHeader = (Instances) data[1];
        //Object o[] = SerializationHelper.readAll(lname);
        //h = (MultilabelClassifier)o[0];
    }

    try {

        Result r = null;

        // Threshold OPtion
        String top = "PCut1"; // default
        if (Utils.getOptionPos("threshold", options) >= 0)
            top = Utils.getOption("threshold", options);

        if (Utils.getOptionPos('x', options) >= 0) {
            // CROSS-FOLD-VALIDATION

            int numFolds = MLUtils.getIntegerOption(Utils.getOption('x', options), 10); // default 10
            // Check for remaining options
            Utils.checkForRemainingOptions(options);
            r = Evaluation.cvModel(h, D_train, numFolds, top, voption);
            System.out.println(r.toString());
        } else {
            // TRAIN-TEST SPLIT

            Instances D_test = null;

            if (Utils.getOptionPos('T', options) >= 0) {
                // load separate test set
                try {
                    D_test = loadDataset(options, 'T');
                    MLUtils.prepareData(D_test);
                } catch (Exception e) {
                    throw new Exception("[Error] Failed to Load Test Instances from file.", e);
                }
            } else {
                // split training set into train and test sets
                // default split
                int N_T = (int) (D_train.numInstances() * 0.60);
                if (Utils.getOptionPos("split-percentage", options) >= 0) {
                    // split by percentage
                    double percentTrain = Double.parseDouble(Utils.getOption("split-percentage", options));
                    N_T = (int) Math.round((D_train.numInstances() * (percentTrain / 100.0)));
                } else if (Utils.getOptionPos("split-number", options) >= 0) {
                    // split by number
                    N_T = Integer.parseInt(Utils.getOption("split-number", options));
                }

                int N_t = D_train.numInstances() - N_T;
                D_test = new Instances(D_train, N_T, N_t);
                D_train = new Instances(D_train, 0, N_T);

            }

            // Invert the split?
            if (Utils.getFlag('i', options)) { //boolean INVERT          = Utils.getFlag('i',options);
                Instances temp = D_test;
                D_test = D_train;
                D_train = temp;
            }

            // Check for remaining options
            Utils.checkForRemainingOptions(options);

            if (h.getDebug())
                System.out.println(":- Dataset -: " + MLUtils.getDatasetName(D_train) + "\tL=" + L
                        + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances() + ")\tLC(t:T)="
                        + Utils.roundDouble(MLUtils.labelCardinality(D_train, L), 2) + ":"
                        + Utils.roundDouble(MLUtils.labelCardinality(D_test, L), 2) + ")");

            if (lname != null) {
                // h is already built, and loaded from a file, test it!
                r = testClassifier(h, D_test);

                String t = top;

                if (top.startsWith("PCut")) {
                    // if PCut is specified we need the training data,
                    // so that we can calibrate the threshold!
                    t = MLEvalUtils.getThreshold(r.predictions, D_train, top);
                }
                r = evaluateModel(h, D_test, t, voption);
            } else {
                //check if train and test set size are > 0
                if (D_train.numInstances() > 0 && D_test.numInstances() > 0) {
                    if (Threaded) {
                        r = evaluateModelM(h, D_train, D_test, top, voption);
                    } else {

                        r = evaluateModel(h, D_train, D_test, top, voption);
                    }
                } else {
                    // otherwise just train on full set. Maybe better throw an exception.
                    h.buildClassifier(D_full);

                }
            }

            // @todo, if D_train==null, assume h is already trained
            if (D_train.numInstances() > 0 && D_test.numInstances() > 0) {
                System.out.println(r.toString());
            }
        }

        // Save model to file?
        if (dname != null) {
            dataHeader = new Instances(D_train, 0);
            SerializationHelper.writeAll(dname, new Object[] { h, dataHeader });
        }

    } catch (Exception e) {
        e.printStackTrace();
        Evaluation.printOptions(h.listOptions());
        System.exit(1);
    }

    System.exit(0);
}

From source file:meka.classifiers.multilabel.Maniac.java

License:Open Source License

@Override
public Instance transformInstance(Instance x) throws Exception {

    Instances tmpInst = new Instances(x.dataset());

    tmpInst.delete();//from ww  w  . j  a v a2  s.  c  o m
    tmpInst.add(x);

    Instances features = this.extractPart(tmpInst, false);

    Instances pseudoLabels = new Instances(this.compressedTemplateInst);
    Instance tmpin = pseudoLabels.instance(0);
    pseudoLabels.delete();

    pseudoLabels.add(tmpin);

    for (int i = 0; i < pseudoLabels.classIndex(); i++) {
        pseudoLabels.instance(0).setMissing(i);
    }

    Instances newDataSet = Instances.mergeInstances(pseudoLabels, features);
    newDataSet.setClassIndex(pseudoLabels.numAttributes());

    return newDataSet.instance(0);
}

From source file:meka.classifiers.multilabel.Maniac.java

License:Open Source License

@Override
public Instances transformLabels(Instances D) throws Exception {
    // crazy scala-specific stuff that is necessary to access
    // "static" methods from java
    org.kramerlab.autoencoder.package$ autoencoderStatics = org.kramerlab.autoencoder.package$.MODULE$;

    org.kramerlab.autoencoder.wekacompatibility.package$ wekaStatics = org.kramerlab.autoencoder.wekacompatibility.package$.MODULE$;

    org.kramerlab.autoencoder.experiments.package$ experimentsStatics = org.kramerlab.autoencoder.experiments.package$.MODULE$;

    int topiter = -1;

    // the optimization is a bit special, since we learn a stream
    // of autoencoders, no need to start from scratch, we just add layers
    if (this.isOptimizeAE()) {
        Instances train = D.trainCV(3, 1);
        Instances test = D.testCV(3, 1);
        Instances labels = this.extractPart(train, true);

        // first convert the arff into non sparse form
        SparseToNonSparse spfilter = new SparseToNonSparse();
        spfilter.setInputFormat(labels);
        Instances aeData = Filter.useFilter(labels, spfilter);

        // now convert it into a format suitable for the autoencoder
        Mat data = wekaStatics.instancesToMat(aeData);

        Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java(
                autoencoderStatics.Sigmoid(), // type of neurons.
                // Sigmoid is ok
                this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) /
                // 2
                this.getCompression(), // compression from k-th layer to (k+1)-th layer
                data, // training data 
                true, // true = L2 Error, false = CrossEntropy
                autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers());

        // test each autoencoder, select the best classifier
        double bestAccuracy = Double.NEGATIVE_INFINITY;
        int iteratorcount = 0;
        topiter = 0;/*from www . java  2 s  .c om*/
        for (Autoencoder a : autoencoders) {
            iteratorcount++;

            Maniac candidate = new Maniac();
            candidate.setOptimizeAE(false);
            candidate.setNumberAutoencoders(this.getNumberAutoencoders());
            candidate.setCompression(this.getCompression());
            candidate.setClassifier(this.getClassifier());

            candidate.setAE(a);

            Result res = Evaluation.evaluateModel(candidate, train, test);
            double curac = (Double) res.getValue("Accuracy");

            if (bestAccuracy < curac) {
                bestAccuracy = curac;
                topiter = iteratorcount;
            }
        }
    }
    Instances features = this.extractPart(D, false);
    Instances labels = this.extractPart(D, true);

    // first convert the arff into non sparse form
    SparseToNonSparse spfilter = new SparseToNonSparse();
    spfilter.setInputFormat(labels);
    Instances aeData = Filter.useFilter(labels, spfilter);

    // now convert it into a format suitable for the autoencoder
    Mat data = wekaStatics.instancesToMat(aeData);

    if (this.getAE() == null) {
        Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java(
                autoencoderStatics.Sigmoid(), // type of neurons.
                // Sigmoid is ok
                this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) /
                // 2
                this.getCompression(), // compression from k-th layer to (k+1)-th layer
                data, // training data 
                true, // true = L2 Error, false = CrossEntropy
                autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers());
        int itercount = 0;
        for (Autoencoder a : autoencoders) {
            itercount++;
            if (topiter > 0 && itercount == topiter || itercount == this.getNumberAutoencoders()) {
                this.setAE(a);
                break;
            }
        }
    }

    Mat compressed = this.getAE().compress(data);
    Instances compressedLabels = wekaStatics.matToInstances(compressed);

    // remember the labels to use for the prediction step,
    this.compressedTemplateInst = new Instances(compressedLabels);

    Instances result = Instances.mergeInstances(compressedLabels, features);

    result.setClassIndex(compressedLabels.numAttributes());

    return result;
}

From source file:meka.classifiers.multilabel.meta.MBR.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    testCapabilities(data);// w  ww.  j a va2  s.  c o m

    int c = data.classIndex();

    // Base BR

    if (getDebug())
        System.out.println("Build BR Base (" + c + " models)");
    m_BASE = (BR) AbstractClassifier.forName(getClassifier().getClass().getName(),
            ((AbstractClassifier) getClassifier()).getOptions());
    m_BASE.buildClassifier(data);

    // Meta BR

    if (getDebug())
        System.out.println("Prepare Meta data           ");
    Instances meta_data = new Instances(data);

    FastVector BinaryClass = new FastVector(c);
    BinaryClass.addElement("0");
    BinaryClass.addElement("1");

    for (int i = 0; i < c; i++) {
        meta_data.insertAttributeAt(new Attribute("metaclass" + i, BinaryClass), c);
    }

    for (int i = 0; i < data.numInstances(); i++) {
        double cfn[] = m_BASE.distributionForInstance(data.instance(i));
        for (int a = 0; a < cfn.length; a++) {
            meta_data.instance(i).setValue(a + c, cfn[a]);
        }
    }

    meta_data.setClassIndex(c);
    m_InstancesTemplate = new Instances(meta_data, 0);

    if (getDebug())
        System.out.println("Build BR Meta (" + c + " models)");

    m_META = (BR) AbstractClassifier.forName(getClassifier().getClass().getName(),
            ((AbstractClassifier) getClassifier()).getOptions());
    m_META.buildClassifier(meta_data);
}

From source file:meka.classifiers.multilabel.meta.RandomSubspaceML.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);//from   w w w . ja va  2  s . c  o m

    m_InstancesTemplates = new Instances[m_NumIterations];
    m_InstanceTemplates = new Instance[m_NumIterations];

    if (getDebug())
        System.out.println("-: Models: ");

    m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier,
            m_NumIterations);

    Random r = new Random(m_Seed);

    int N_sub = (D.numInstances() * m_BagSizePercent / 100);

    int L = D.classIndex();
    int d = D.numAttributes() - L;
    int d_new = d * m_AttSizePercent / 100;
    m_IndicesCut = new int[m_NumIterations][];

    for (int i = 0; i < m_NumIterations; i++) {

        // Downsize the instance space (exactly like in EnsembleML.java)

        if (getDebug())
            System.out.print("\t" + (i + 1) + ": ");
        D.randomize(r);
        Instances D_cut = new Instances(D, 0, N_sub);
        if (getDebug())
            System.out.print("N=" + D.numInstances() + " -> N'=" + D_cut.numInstances() + ", ");

        // Downsize attribute space

        D_cut.setClassIndex(-1);
        int indices_a[] = A.make_sequence(L, d + L);
        A.shuffle(indices_a, r);
        indices_a = Arrays.copyOfRange(indices_a, 0, d - d_new);
        Arrays.sort(indices_a);
        m_IndicesCut[i] = A.invert(indices_a, D.numAttributes());
        D_cut = F.remove(D_cut, indices_a, false);
        D_cut.setClassIndex(L);
        if (getDebug())
            System.out.print(" A:=" + (D.numAttributes() - L) + " -> A'=" + (D_cut.numAttributes() - L) + " ("
                    + m_IndicesCut[i][L] + ",...," + m_IndicesCut[i][m_IndicesCut[i].length - 1] + ")");

        // Train multi-label classifier

        if (m_Classifiers[i] instanceof Randomizable)
            ((Randomizable) m_Classifiers[i]).setSeed(m_Seed + i);
        if (getDebug())
            System.out.println(".");

        m_Classifiers[i].buildClassifier(D_cut);
        m_InstanceTemplates[i] = D_cut.instance(1);
        m_InstancesTemplates[i] = new Instances(D_cut, 0);
    }
    if (getDebug())
        System.out.println(":-");
}

From source file:meka.classifiers.multilabel.MLCBMaD.java

License:Open Source License

@Override
public Instance transformInstance(Instance x) throws Exception {
    Instances tmpInst = new Instances(x.dataset());

    tmpInst.delete();//from  w w  w . j a  va2s. c  om
    tmpInst.add(x);

    Instances features = this.extractPart(tmpInst, false);

    Instances pseudoLabels = new Instances(this.compressedMatrix);
    Instance tmpin = pseudoLabels.instance(0);
    pseudoLabels.delete();

    pseudoLabels.add(tmpin);

    for (int i = 0; i < pseudoLabels.classIndex(); i++) {
        pseudoLabels.instance(0).setMissing(i);
    }

    Instances newDataSet = Instances.mergeInstances(pseudoLabels, features);
    newDataSet.setClassIndex(this.size);

    return newDataSet.instance(0);
}

From source file:meka.classifiers.multilabel.MLCBMaD.java

License:Open Source License

@Override
public Instances transformLabels(Instances D) throws Exception {

    Instances features = this.extractPart(D, false);
    Instances labels = this.extractPart(D, true);

    BooleanMatrixDecomposition bmd = BooleanMatrixDecomposition.BEST_CONFIGURED(this.threshold);
    Tuple<Instances, Instances> res = bmd.decompose(labels, this.size);

    this.compressedMatrix = res._1;
    this.uppermatrix = res._2;

    Instances result = Instances.mergeInstances(compressedMatrix, features);
    result.setClassIndex(this.getSize());

    return result;
}

From source file:meka.classifiers.multilabel.PLST.java

License:Open Source License

/**
 * The method to transform the labels into another set of latent labels,
 * typically a compression method is used, e.g., Boolean matrix decomposition
 * in the case of MLC-BMaD, or matrix multiplication based on SVD for PLST.
 *
 * @param D the instances to transform into new instances with transformed labels. The
 * Instances consist of features and original labels.
 * @return The resulting instances. Instances consist of features and transformed labels.
 *///from ww w . ja  v a2  s.  c  o m
@Override
public Instances transformLabels(Instances D) throws Exception {
    Instances features = this.extractPart(D, false);
    Instances labels = this.extractPart(D, true);

    Matrix labelMatrix = MatrixUtils.instancesToMatrix(labels);

    // first, lets do the preprocessing as in the original implementation
    double[] averages = new double[labels.numAttributes()];

    for (int i = 0; i < labels.numAttributes(); i++) {
        double[] column = labels.attributeToDoubleArray(i);
        double sum = 0.0;
        for (int j = 0; j < column.length; j++) {
            if (column[j] == 1.0) {
                sum += 1.0;
            } else {
                sum += -1;
                // The algorithm needs 1/-1 coding, so let's
                // change the matrix here
                labelMatrix.set(j, i, -1.0);
            }
        }
        averages[i] = sum / column.length;
    }

    double[][] shiftMatrix = new double[1][labels.numAttributes()];

    shiftMatrix[0] = averages;

    // remember shift for prediction
    this.m_Shift = new Matrix(shiftMatrix);

    double[][] shiftTrainMatrix = new double[labels.numInstances()][labels.numAttributes()];

    for (int i = 0; i < labels.numInstances(); i++) {
        shiftTrainMatrix[i] = averages;
    }

    Matrix trainShift = new Matrix(shiftTrainMatrix);

    SingularValueDecomposition svd = new SingularValueDecomposition(labelMatrix.minus(trainShift));

    // The paper uses U here, but the implementation by the authors uses V, so
    // we used V here too.
    m_v = svd.getV();

    //remove columns so only size are left
    double[][] newArr = new double[m_v.getRowDimension()][this.getSize()];

    for (int i = 0; i < newArr.length; i++) {
        for (int j = 0; j < newArr[i].length; j++) {
            newArr[i][j] = m_v.getArray()[i][j];
        }
    }

    m_v = new Matrix(newArr);

    // now the multiplication (last step of the algorithm)
    Matrix compressed = MatrixUtils.instancesToMatrix(labels).times(this.m_v);

    // and transform it to Instances
    ArrayList<Attribute> attinfos = new ArrayList<Attribute>();

    for (int i = 0; i < compressed.getColumnDimension(); i++) {

        Attribute att = new Attribute("att" + i);
        attinfos.add(att);
    }

    // create pattern instances (also used in prediction) note: this is a regression
    // problem now, labels are not binary
    this.m_PatternInstances = new Instances("compressedlabels", attinfos, compressed.getRowDimension());

    // fill result Instances
    Instances result = Instances.mergeInstances(MatrixUtils.matrixToInstances(compressed, m_PatternInstances),
            features);

    result.setClassIndex(this.getSize());
    return result;
}