Example usage for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex()

Source Link

Document

Returns the class attribute's index.

Usage

From source file:meka.classifiers.multilabel.Evaluation.java

License:Open Source License

/**
 * EvaluateModel - Build model 'h' on 'D_train', test it on 'D_test'.
 * Note that raw multi-label predictions returned in Result may not have been thresholded yet.
 * However, data statistics, classifier info, and running times are inpregnated into the Result here.
 * @param   h      a multi-dim. classifier
 * @param   D_train   training data//from   w w  w . j  av  a2s .  c om
 * @param   D_test    test data
 * @return   raw prediction data (no evaluation yet)
 */
public static Result evaluateModel(MultiLabelClassifier h, Instances D_train, Instances D_test)
        throws Exception {

    long before = System.currentTimeMillis();
    // Set test data as unlabelled data, if SemisupervisedClassifier
    if (h instanceof SemisupervisedClassifier) {
        ((SemisupervisedClassifier) h).introduceUnlabelledData(MLUtils.setLabelsMissing(new Instances(D_test)));
    }
    // Train
    h.buildClassifier(D_train);
    long after = System.currentTimeMillis();

    //System.out.println(":- Classifier -: "+h.getClass().getName()+": "+Arrays.toString(h.getOptions()));

    // Test
    long before_test = System.currentTimeMillis();
    Result result = testClassifier(h, D_test);
    long after_test = System.currentTimeMillis();

    result.setValue("Number of training instances", D_train.numInstances());
    result.setValue("Number of test instances", D_test.numInstances());
    result.setValue("Label cardinality (train set)", MLUtils.labelCardinality(D_train));
    result.setValue("Label cardinality (test set)", MLUtils.labelCardinality(D_test));

    result.setValue("Build Time", (after - before) / 1000.0);
    result.setValue("Test Time", (after_test - before_test) / 1000.0);
    result.setValue("Total Time", (after_test - before) / 1000.0);

    result.setInfo("Classifier", h.getClass().getName());
    result.setInfo("Options", Arrays.toString(h.getOptions()));
    result.setInfo("Additional Info", h.toString());
    result.setInfo("Dataset", MLUtils.getDatasetName(D_train));
    result.setInfo("Number of labels (L)", String.valueOf(D_train.classIndex()));
    //result.setInfo("Maxfreq_set",MLUtils.mostCommonCombination(D_train,result.L));

    String model = h.getModel();
    if (model.length() > 0)
        result.setModel("Model", h.getModel());

    return result;
}

From source file:meka.classifiers.multilabel.Evaluation.java

License:Open Source License

/**
 * TestClassifier - test classifier h on D_test
 * @param   h      a multi-dim. classifier, ALREADY BUILT
 * @param   D_test    test data/*  ww w  .ja v a  2 s  . c om*/
 * @return   Result   with raw prediction data ONLY
 */
public static Result testClassifier(MultiLabelClassifier h, Instances D_test) throws Exception {

    int L = D_test.classIndex();
    Result result = new Result(D_test.numInstances(), L);

    if (h.getDebug())
        System.out.print(":- Evaluate ");
    for (int i = 0, c = 0; i < D_test.numInstances(); i++) {

        if (h.getDebug()) {
            int t = i * 50 / D_test.numInstances();
            if (t > c) {
                System.out.print("#");
                c = t;
            }
        }

        // No cheating allowed; clear all class information
        AbstractInstance x = (AbstractInstance) ((AbstractInstance) D_test.instance(i)).copy();
        for (int v = 0; v < D_test.classIndex(); v++)
            x.setValue(v, 0.0);

        // Get and store ranking
        double y[] = h.distributionForInstance(x);
        // Cut off any [no-longer-needed] probabalistic information from MT classifiers.
        if (h instanceof MultiTargetClassifier)
            y = Arrays.copyOf(y, L);

        // Store the result
        result.addResult(y, D_test.instance(i));
    }
    if (h.getDebug())
        System.out.println(":-");

    /*
    if(h.getDebug()) {
            
       for(int i = 0; i < result.size(); i++) {
    System.out.println("\t"+Arrays.toString(result.rowTrue(i))+" vs "+Arrays.toString(result.rowRanking(i)));
       }
    }
    */

    return result;
}

From source file:meka.classifiers.multilabel.Evaluation.java

License:Open Source License

/**
 *Test Classifier but threaded (Multiple)     
 * @param   h      a multi-dim. classifier, ALREADY BUILT (threaded, implements MultiLabelThreaded)
 * @param   D_test    test data/*from   w  w  w .  java2 s  . c  om*/
 * @return   Result   with raw prediction data ONLY
*/
public static Result testClassifierM(MultiLabelClassifier h, Instances D_test) throws Exception {

    int L = D_test.classIndex();
    Result result = new Result(D_test.numInstances(), L);
    if (h.getDebug())
        System.out.print(":- Evaluate ");
    if (h instanceof MultiLabelClassifierThreaded) {
        ((MultiLabelClassifierThreaded) h).setThreaded(true);
        double y[][] = ((MultiLabelClassifierThreaded) h).distributionForInstanceM(D_test);

        for (int i = 0, c = 0; i < D_test.numInstances(); i++) {
            // Store the result
            result.addResult(y[i], D_test.instance(i));
        }
        if (h.getDebug())
            System.out.println(":-");

        /*
        if(h.getDebug()) {
                
           for(int i = 0; i < result.size(); i++) {
              System.out.println("\t"+Arrays.toString(result.rowActual(i))+" vs "+Arrays.toString(result.rowRanking(i)));
           }
                
                
        }
        */
    }
    return result;
}

From source file:meka.classifiers.multilabel.HASEL.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {

    int L = D.classIndex();
    int N = D.numInstances();

    // Get partition from dataset hierarchy
    kMap = SuperLabelUtils.getPartitionFromDatasetHierarchy(D);
    m_M = kMap.length;/* www  . ja va2  s  .com*/
    m_Classifiers = AbstractClassifier.makeCopies(m_Classifier, m_M);
    m_InstancesTemplates = new Instances[m_M];

    for (int i = 0; i < m_M; i++) {

        if (getDebug())
            System.out.println("Building model " + (i + 1) + "/" + m_M + ": " + Arrays.toString(kMap[i]));
        Instances D_i = SuperLabelUtils.makePartitionDataset(D, kMap[i]);
        m_Classifiers[i].buildClassifier(D_i);
        m_InstancesTemplates[i] = new Instances(D_i, 0);
    }

}

From source file:meka.classifiers.multilabel.incremental.CCUpdateable.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);/*from   w w w . j  av  a 2  s  .c o  m*/

    int L = D.classIndex();

    int indices[] = retrieveChain();
    if (indices == null) {
        indices = MLUtils.gen_indices(L);
        MLUtils.randomize(indices, new Random(m_S));
    }
    if (getDebug())
        System.out.print(":- Chain (");
    root = new ULink(indices, 0, D);
    if (getDebug())
        System.out.println(" ) -:");
}

From source file:meka.classifiers.multilabel.incremental.IncrementalEvaluation.java

License:Open Source License

/**
 * EvaluateModel - Build and evaluate.//from  w w  w.jav a  2 s. c  o m
 * @param   h         a multi-label Updateable classifier
 * @param   options   dataset options (classifier options should already be set)
 * @return   The evaluation Result
 */
public static Result evaluateModel(MultiLabelClassifier h, String options[]) throws Exception {

    // Load Instances, ...
    Instances D = Evaluation.loadDataset(options);
    MLUtils.prepareData(D);

    // Set the number of windows (batches) @todo move below combining options?
    int nWin = OptionUtils.parse(options, 'x', 10);

    // Set the size of the initial triaining
    int nInit = OptionUtils.parse(options, "split-percentage", 10);

    // Partially labelled ?
    double rLabeled = OptionUtils.parse(options, "supervision", 1.);

    // Get Threshold
    String Top = OptionUtils.parse(options, "threshold", "0.5");

    // Get Verbosity (do we want to see everything?)
    String Vop = OptionUtils.parse(options, "verbosity", "3");

    if (h.getDebug())
        System.out.println(":- Dataset -: " + MLUtils.getDatasetName(D) + "\tL=" + D.classIndex() + "");

    Utils.checkForRemainingOptions(options);
    return evaluateModelPrequentialBasic(h, D, nWin, rLabeled, Top, Vop);
}

From source file:meka.classifiers.multilabel.incremental.IncrementalEvaluation.java

License:Open Source License

/**
 * EvaluateModelBatchWindow - Evaluate a multi-label data-stream model over windows.
 * @param   h   Multilabel Classifier//from w  ww  . j  a  v  a2s . c  o m
 * @param    D   stream
 * @param   numWindows   number of windows
 * @param   rLabeled   labelled-ness (1.0 by default)
 * @param   Top   threshold option
 * @param   Vop   verbosity option
 * @return   The Result on the final window (but it contains samples of all the other evaluated windows).
 * The window is sampled every N/numWindows instances, for a total of numWindows windows.
 */
public static Result evaluateModelBatchWindow(MultiLabelClassifier h, Instances D, int numWindows,
        double rLabeled, String Top, String Vop) throws Exception {

    if (h.getDebug())
        System.out
                .println(":- Classifier -: " + h.getClass().getName() + ": " + Arrays.toString(h.getOptions()));

    int N = D.numInstances();
    int L = D.classIndex();

    // the Result to use
    Result result = null;
    // the samples of all windows
    ArrayList<HashMap<String, Object>> samples = new ArrayList<HashMap<String, Object>>();

    long train_time = 0;
    long test_time = 0;

    int windowSize = (int) Math.floor(D.numInstances() / (double) numWindows);

    if (rLabeled * windowSize < 1.)
        throw new Exception("[Error] The ratio of labelled instances (" + rLabeled
                + ") is too small given the window size!");

    double nth = 1. / rLabeled; // label every nth example

    Instances D_init = new Instances(D, 0, windowSize); // initial window

    if (h.getDebug()) {
        System.out.println("Training classifier on initial window ...");
    }
    train_time = System.currentTimeMillis();
    h.buildClassifier(D_init); // initial classifier
    train_time = System.currentTimeMillis() - train_time;
    if (h.getDebug()) {
        System.out.println("Done (in " + (train_time / 1000.0) + " s)");
    }
    D = new Instances(D, windowSize, D.numInstances() - windowSize); // the rest (after the initial window)

    double t[] = new double[L];
    Arrays.fill(t, 0.5);

    int V = MLUtils.getIntegerOption(Vop, 3);
    if (h.getDebug()) {
        System.out.println("--------------------------------------------------------------------------------");
        System.out.print("#" + Utils.padLeft("w", 6) + " " + Utils.padLeft("n", 6));
        for (String m : measures) {
            System.out.print(" ");
            System.out.print(Utils.padLeft(m, 12));
        }
        System.out.println("");
        System.out.println("--------------------------------------------------------------------------------");
    }

    int i = 0;
    for (int w = 0; w < numWindows - 1; w++) {
        // For each evaluation window ...

        result = new Result(L);
        result.setInfo("Supervision", String.valueOf(rLabeled));
        result.setInfo("Type", "MLi");

        int n = 0;
        test_time = 0;
        train_time = 0;

        for (int c = 0; i < (w * windowSize) + windowSize; i++) {
            // For each instance in the evaluation window ...

            Instance x = D.instance(i);
            AbstractInstance x_ = (AbstractInstance) ((AbstractInstance) x).copy(); // copy 
            // (we can't clear the class values because certain classifiers need to know how well they're doing -- just trust that there's no cheating!)
            //for(int j = 0; j < L; j++)  
            //   x_.setValue(j,0.0);

            if (rLabeled < 0.5 && (i % (int) (1 / rLabeled) == 0)
                    || (rLabeled >= 0.5 && (i % (int) (1. / (1. - rLabeled)) != 0))) {
                // LABELLED - Test & record prediction 
                long before_test = System.currentTimeMillis();
                double y[] = h.distributionForInstance(x_);
                long after_test = System.currentTimeMillis();
                test_time += (after_test - before_test); // was +=
                result.addResult(y, x);
                n++;
            } else {
                // UNLABELLED
                x = MLUtils.setLabelsMissing(x, L);
            }

            // Update the classifier. (The classifier will have to decide if it wants to deal with unlabelled instances.)
            long before = System.currentTimeMillis();
            ((UpdateableClassifier) h).updateClassifier(x);
            long after = System.currentTimeMillis();
            train_time += (after - before); // was +=
        }

        // calculate results
        result.setInfo("Threshold", Arrays.toString(t));
        result.output = Result.getStats(result, Vop);
        result.setMeasurement("Test time", (test_time) / 1000.0);
        result.setMeasurement("Build time", (train_time) / 1000.0);
        result.setMeasurement("Total time", (test_time + train_time) / 1000.0);
        result.setMeasurement("Threshold", (double) t[0]);
        result.setMeasurement("Instances", (double) i);
        result.setMeasurement("Samples", (double) (samples.size() + 1));
        samples.add(result.output);

        // Display results (to CLI)
        if (h.getDebug()) {
            System.out.print("#" + Utils.doubleToString((double) w + 1, 6, 0) + " "
                    + Utils.doubleToString((double) n, 6, 0));
            n = 0;
            for (String m : measures) {
                System.out.print(" ");
                System.out.print(Utils.doubleToString((Double) result.getMeasurement(m), 12, 4));
            }
            System.out.println("");
        }

        // Calibrate threshold for next window
        if (Top.equals("PCutL")) {
            t = ThresholdUtils.calibrateThresholds(result.predictions,
                    MLUtils.labelCardinalities(result.actuals));
        } else {
            Arrays.fill(t, ThresholdUtils.calibrateThreshold(result.predictions,
                    MLUtils.labelCardinality(result.allTrueValues())));
        }

    }

    if (h.getDebug()) {
        System.out.println("--------------------------------------------------------------------------------");
    }

    // This is the last Result; prepare it for evaluation output.
    result.setInfo("Classifier", h.getClass().getName());
    result.vals.put("Test time", (test_time) / 1000.0);
    result.vals.put("Build time", (train_time) / 1000.0);
    result.vals.put("Total time", (test_time + train_time) / 1000.0);
    result.vals.put("Total instances tested", (double) i);
    result.vals.put("Initial instances for training", (double) windowSize);
    result.setInfo("Options", Arrays.toString(h.getOptions()));
    result.setInfo("Additional Info", h.toString());
    result.setInfo("Dataset", MLUtils.getDatasetName(D));
    result.output = Result.getStats(result, Vop);
    result.setMeasurement("Results sampled over time", Result.getResultsAsInstances(samples));

    return result;
}

From source file:meka.classifiers.multilabel.incremental.IncrementalEvaluation.java

License:Open Source License

/**
 * Prequential Evaluation - Accuracy since the start of evaluation.
 * @param   h   Multilabel Classifier/*from   w w  w .  ja va 2 s . c o m*/
 * @param    D   stream
 * @param   windowSize   sampling frequency (of evaluation statistics)
 * @param   rLabeled   labelled-ness (1.0 by default)
 * @param   Top   threshold option
 * @param   Vop   verbosity option
 * The window is sampled every N/numWindows instances, for a total of numWindows windows.
 */
public static Result evaluateModelPrequentialBasic(MultiLabelClassifier h, Instances D, int windowSize,
        double rLabeled, String Top, String Vop) throws Exception {

    if (h.getDebug())
        System.out
                .println(":- Classifier -: " + h.getClass().getName() + ": " + Arrays.toString(h.getOptions()));

    int L = D.classIndex();

    Result result = new Result();

    long train_time = 0;
    long test_time = 0;

    double nth = 1. / rLabeled; // label every nth example
    result.setInfo("Supervision", String.valueOf(rLabeled));

    Instances D_init = new Instances(D, 0, windowSize); // initial window

    if (h.getDebug()) {
        System.out.println("Training classifier on initial window (of size " + windowSize + ") ...");
    }

    train_time = System.currentTimeMillis();
    h.buildClassifier(D_init); // initial classifir
    train_time = System.currentTimeMillis() - train_time;

    D = new Instances(D, windowSize, D.numInstances() - windowSize); // the rest (after the initial window)

    if (h.getDebug()) {
        System.out.println(
                "Proceeding to Test/Label/Update cycle on remaining (" + D.numInstances() + ") instances ...");
    }

    result.setInfo("Classifier", h.getClass().getName());
    result.setInfo("Options", Arrays.toString(h.getOptions()));
    result.setInfo("Additional Info", h.toString());
    result.setInfo("Dataset", MLUtils.getDatasetName(D));
    result.setInfo("Verbosity", Vop);
    if (h instanceof MultiTargetClassifier || Evaluation.isMT(D)) {
        result.setInfo("Type", "MT");
    } else {
        result.setInfo("Type", "ML");
        double t = 0.5;
        try {
            t = Double.parseDouble(Top);
        } catch (Exception e) {
            System.err.println(
                    "[WARNING] Only a single threshold can be chosen for this kind of evaluation; Using " + t);
        }
        result.setInfo("Threshold", String.valueOf(t));
    }
    ArrayList<HashMap<String, Object>> samples = new ArrayList<HashMap<String, Object>>();

    for (int i = 0; i < D.numInstances(); i++) {

        Instance x = D.instance(i);
        AbstractInstance x_ = (AbstractInstance) ((AbstractInstance) x).copy(); // copy 

        /*
         * TEST
         */
        long before_test = System.currentTimeMillis();
        double y[] = h.distributionForInstance(x_);
        long after_test = System.currentTimeMillis();
        test_time += (after_test - before_test);
        result.addResult(y, x);

        /*
         * LABEL BECOMES AVAILABLE ?
         */
        if (rLabeled >= 0.5) {
            x = MLUtils.setLabelsMissing(x, L);
        }

        /*
         * UPDATE
         * (The classifier will have to decide if it wants to deal with unlabelled instances.)
         */
        long before = System.currentTimeMillis();
        ((UpdateableClassifier) h).updateClassifier(x);
        long after = System.currentTimeMillis();
        train_time += (after - before);

        /*
         * RECORD MEASUREMENT
         */
        if (i % windowSize == (windowSize - 1)) {
            HashMap<String, Object> eval_sample = Result.getStats(result, Vop);
            eval_sample.put("Test time", (test_time) / 1000.0);
            eval_sample.put("Build time", (train_time) / 1000.0);
            eval_sample.put("Total time", (test_time + train_time) / 1000.0);
            eval_sample.put("Instances", (double) i);
            eval_sample.put("Samples", (double) (samples.size() + 1));
            samples.add(eval_sample);
            System.out.println("Sample (#" + samples.size() + ") of performance at " + i + "/"
                    + D.numInstances() + " instances.");
        }

    }

    result.output = Result.getStats(result, Vop);
    result.setMeasurement("Results sampled over time", Result.getResultsAsInstances(samples));

    result.vals.put("Test time", (test_time) / 1000.0);
    result.vals.put("Build time", (train_time) / 1000.0);
    result.vals.put("Total time", (test_time + train_time) / 1000.0);

    return result;
}

From source file:meka.classifiers.multilabel.incremental.PSUpdateable.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);/* www .  j av a2s .c o m*/

    L = D.classIndex();
    batch = new Instances(D);

    if (batch.numInstances() >= getLimit()) {
        // if we have at least the limit, build!
        if (getDebug())
            System.out.println("Train on instances 0 ... " + batch.numInstances());
        combinations = PSUtils.countCombinationsSparse(batch, L);
        MLUtils.pruneCountHashMap(combinations, m_P);
        // { NEW (we don't want more than m_Support classes!)
        int p = m_P;
        while (combinations.size() > getSupport()) {
            //System.out.println("double prune!");
            m_P++;
            MLUtils.pruneCountHashMap(combinations, m_P);
        }
        super.buildClassifier(batch);
        m_P = p;
        // } NEW
        mlu = null; // We won't be needing the majority set classifier!
    } else {
        // otherwise we don't have enough yet, initialize the collection batch
        if (getDebug())
            System.out.println("Continue collection batch from instance " + batch.numInstances());
        // we will predict the majority labelset until we have a large enough batch
        mlu.buildClassifier(batch);
    }
}

From source file:meka.classifiers.multilabel.LabelTransformationClassifier.java

License:Open Source License

/**
 * Returns a new set of instances either only with the labels (labels = true) or
 * only the features (labels = false)/*from w  ww. java  2s  .com*/
 *
 * @param inst The input instances.
 * @param labels Return labels (true) or features (false)
 */
protected Instances extractPart(Instances inst, boolean labels) throws Exception {
    //TODO Maybe alreade exists somewhere in Meka?

    Remove remove = new Remove();
    remove.setAttributeIndices("first-" + (inst.classIndex()));
    remove.setInvertSelection(labels);
    remove.setInputFormat(inst);
    return Filter.useFilter(inst, remove);
}