Example usage for weka.core Instances testCV

Introduction

In this page you can find the example usage for weka.core Instances testCV.

Prototype



public Instances testCV(int numFolds, int numFold)

Source Link

Document

Creates the test set for one fold of a cross-validation on the dataset.

Usage

From source file:machinelearningcw.EnhancedLinearPerceptron.java

public boolean crossValidation(Instances ins) throws Exception {
    //get the data
    Instances data = new Instances(ins);
    Instances train;// the new training data
    Instances test; // the new testing data

    int seed = 0;
    Random rand = new Random(seed);
    //randomize the data
    data.randomize(rand);/* w  w  w .ja  v a2s  . c  o m*/

    //number of folds
    int folds = 10;
    int offlineErrors = 0;
    int onlineErrors = 0;

    for (int i = 0; i < folds; i++) {
        train = data.trainCV(folds, i);
        test = data.testCV(folds, i);

        //add the the total errors for each
        //offlineErrors += 
        offlinePerceptron(train);
        for (Instance inst : test) {
            if (classifyInstance(inst) != inst.classValue()) {
                offlineErrors += 1;
            }

        }
        //reset w
        Arrays.fill(w, 1);
        perceptron(train);
        for (Instance inst : test) {
            if (classifyInstance(inst) != inst.classValue()) {
                onlineErrors += 1;
            }
        }

    }
    //  System.out.println(" off: " + offlineErrors);
    //    System.out.println(" on: " + onlineErrors);
    //calculate the mean of the total errors
    offlineErrors = offlineErrors / folds;
    onlineErrors = onlineErrors / folds;
    // System.out.println(flag);
    return offlineErrors > onlineErrors;

}

From source file:machinelearningproject.MachineLearningProject.java

/**
 * @param args the command line arguments
 *//*  w  w  w  .jav  a  2 s  .c o  m*/
public static void main(String[] args) throws Exception {
    // TODO code application logic here
    DataSource source = new DataSource("D:\\spambase.arff");
    //        DataSource source = new DataSource("D:\\weather-nominal.arff");
    Instances instances = source.getDataSet();
    int numAttr = instances.numAttributes();
    instances.setClassIndex(instances.numAttributes() - 1);

    int runs = 5;
    int seed = 15;
    for (int i = 0; i < runs; i++) {
        //randomize data
        seed = seed + 1; // the seed for randomizing the data
        Random rand = new Random(seed); // create seeded number generator
        Instances randData = new Instances(instances); // create copy of original data
        Collections.shuffle(randData);

        Evaluation evalDTree = new Evaluation(randData);
        Evaluation evalRF = new Evaluation(randData);
        Evaluation evalSVM = new Evaluation(randData);

        int folds = 10;
        for (int n = 0; n < folds; n++) {
            Instances train = randData.trainCV(folds, n, rand);
            Instances test = randData.testCV(folds, n);
            //instantiate classifiers
            DecisionTree dtree = new DecisionTree();
            RandomForest rf = new RandomForest(100);
            SMO svm = new SMO();
            RBFKernel rbfKernel = new RBFKernel();
            double gamma = 0.70;
            rbfKernel.setGamma(gamma);

            dtree.buildClassifier(train);
            rf.buildClassifier(train);
            svm.buildClassifier(train);

            evalDTree.evaluateModel(dtree, test);
            evalRF.evaluateModel(rf, test);
            evalSVM.evaluateModel(svm, test);
        }
        System.out.println("=== Decision Tree Evaluation ===");
        System.out.println(evalDTree.toSummaryString());
        System.out.println(evalDTree.toClassDetailsString());
        System.out.println(evalDTree.toMatrixString());

        System.out.println("=== Random Forest Evaluation ===");
        System.out.println(evalRF.toSummaryString());
        System.out.println(evalRF.toClassDetailsString());
        System.out.println(evalRF.toMatrixString());

        System.out.println("=== SVM Evaluation ===");
        System.out.println(evalSVM.toSummaryString());
        System.out.println(evalSVM.toClassDetailsString());
        System.out.println(evalSVM.toMatrixString());
    }
}

From source file:mao.datamining.ModelProcess.java

private void testCV(Classifier classifier, Instances finalTrainDataSet, FileOutputStream testCaseSummaryOut,
        TestResult result) {/*from   w w w  .  j  a  v  a 2  s .c o  m*/
    long start, end, trainTime = 0, testTime = 0;
    Evaluation evalAll = null;
    double confusionMatrix[][] = null;
    // randomize data, and then stratify it into 10 groups
    Random rand = new Random(1);
    Instances randData = new Instances(finalTrainDataSet);
    randData.randomize(rand);
    if (randData.classAttribute().isNominal()) {
        //always run with 10 cross validation
        randData.stratify(folds);
    }

    try {
        evalAll = new Evaluation(randData);
        for (int i = 0; i < folds; i++) {
            Evaluation eval = new Evaluation(randData);
            Instances train = randData.trainCV(folds, i);
            Instances test = randData.testCV(folds, i);
            //counting traininig time
            start = System.currentTimeMillis();
            Classifier j48ClassifierCopy = Classifier.makeCopy(classifier);
            j48ClassifierCopy.buildClassifier(train);
            end = System.currentTimeMillis();
            trainTime += end - start;

            //counting test time
            start = System.currentTimeMillis();
            eval.evaluateModel(j48ClassifierCopy, test);
            evalAll.evaluateModel(j48ClassifierCopy, test);
            end = System.currentTimeMillis();
            testTime += end - start;
        }

    } catch (Exception e) {
        ModelProcess.logging(null, e);
    } //end test by cross validation

    // output evaluation
    try {
        ModelProcess.logging("");
        //write into summary file
        testCaseSummaryOut
                .write((evalAll.toSummaryString("=== Cross Validation Summary ===", true)).getBytes());
        testCaseSummaryOut.write("\n".getBytes());
        testCaseSummaryOut.write(
                (evalAll.toClassDetailsString("=== " + folds + "-fold Cross-validation Class Detail ===\n"))
                        .getBytes());
        testCaseSummaryOut.write("\n".getBytes());
        testCaseSummaryOut
                .write((evalAll.toMatrixString("=== Confusion matrix for all folds ===\n")).getBytes());
        testCaseSummaryOut.flush();

        confusionMatrix = evalAll.confusionMatrix();
        result.setConfusionMatrix10Folds(confusionMatrix);
    } catch (Exception e) {
        ModelProcess.logging(null, e);
    }
}

From source file:meka.classifiers.multilabel.Evaluation.java

License:Open Source License

/**
 * CVModel - Split D into train/test folds, and then train and evaluate on each one.
 * @param   h       a multi-output classifier
 * @param   D          test data Instances
 * @param   numFolds number of folds of CV
 * @param   top        Threshold OPtion (pertains to multi-label data only)
 * @param   vop       Verbosity OPtion (which measures do we want to calculate/output)
 * @return   Result   raw prediction data with evaluation statistics included.
 *//*from  www  . j  a  va 2s  . com*/
public static Result cvModel(MultiLabelClassifier h, Instances D, int numFolds, String top, String vop)
        throws Exception {
    Result r_[] = new Result[numFolds];
    for (int i = 0; i < numFolds; i++) {
        Instances D_train = D.trainCV(numFolds, i);
        Instances D_test = D.testCV(numFolds, i);
        if (h.getDebug())
            System.out.println(":- Fold [" + i + "/" + numFolds + "] -: " + MLUtils.getDatasetName(D) + "\tL="
                    + D.classIndex() + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances()
                    + ")\tLC(t:T)=" + Utils.roundDouble(MLUtils.labelCardinality(D_train, D.classIndex()), 2)
                    + ":" + Utils.roundDouble(MLUtils.labelCardinality(D_test, D.classIndex()), 2) + ")");
        r_[i] = evaluateModel(h, D_train, D_test); // <-- should not run stats yet!
    }
    Result r = MLEvalUtils.combinePredictions(r_);
    if (h instanceof MultiTargetClassifier || isMT(D)) {
        r.setInfo("Type", "MT-CV");
    } else if (h instanceof MultiLabelClassifier) {
        r.setInfo("Type", "ML-CV");
        try {
            r.setInfo("Threshold", String.valueOf(Double.parseDouble(top)));
        } catch (Exception e) {
            System.err.println(
                    "[WARNING] Automatic threshold calibration not currently enabled for cross-fold validation, setting threshold = 0.5.\n");
            r.setInfo("Threshold", String.valueOf(0.5));
        }
    }
    r.setInfo("Verbosity", vop);
    r.output = Result.getStats(r, vop);
    // Need to reset this because of CV
    r.setValue("Number of training instances", D.numInstances());
    r.setValue("Number of test instances", D.numInstances());
    return r;
}

From source file:meka.classifiers.multilabel.Maniac.java

License:Open Source License

@Override
public Instances transformLabels(Instances D) throws Exception {
    // crazy scala-specific stuff that is necessary to access
    // "static" methods from java
    org.kramerlab.autoencoder.package$ autoencoderStatics = org.kramerlab.autoencoder.package$.MODULE$;

    org.kramerlab.autoencoder.wekacompatibility.package$ wekaStatics = org.kramerlab.autoencoder.wekacompatibility.package$.MODULE$;

    org.kramerlab.autoencoder.experiments.package$ experimentsStatics = org.kramerlab.autoencoder.experiments.package$.MODULE$;

    int topiter = -1;

    // the optimization is a bit special, since we learn a stream
    // of autoencoders, no need to start from scratch, we just add layers
    if (this.isOptimizeAE()) {
        Instances train = D.trainCV(3, 1);
        Instances test = D.testCV(3, 1);
        Instances labels = this.extractPart(train, true);

        // first convert the arff into non sparse form
        SparseToNonSparse spfilter = new SparseToNonSparse();
        spfilter.setInputFormat(labels);
        Instances aeData = Filter.useFilter(labels, spfilter);

        // now convert it into a format suitable for the autoencoder
        Mat data = wekaStatics.instancesToMat(aeData);

        Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java(
                autoencoderStatics.Sigmoid(), // type of neurons.
                // Sigmoid is ok
                this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) /
                // 2
                this.getCompression(), // compression from k-th layer to (k+1)-th layer
                data, // training data 
                true, // true = L2 Error, false = CrossEntropy
                autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers());

        // test each autoencoder, select the best classifier
        double bestAccuracy = Double.NEGATIVE_INFINITY;
        int iteratorcount = 0;
        topiter = 0;//from  ww w  .ja  v a 2  s.co  m
        for (Autoencoder a : autoencoders) {
            iteratorcount++;

            Maniac candidate = new Maniac();
            candidate.setOptimizeAE(false);
            candidate.setNumberAutoencoders(this.getNumberAutoencoders());
            candidate.setCompression(this.getCompression());
            candidate.setClassifier(this.getClassifier());

            candidate.setAE(a);

            Result res = Evaluation.evaluateModel(candidate, train, test);
            double curac = (Double) res.getValue("Accuracy");

            if (bestAccuracy < curac) {
                bestAccuracy = curac;
                topiter = iteratorcount;
            }
        }
    }
    Instances features = this.extractPart(D, false);
    Instances labels = this.extractPart(D, true);

    // first convert the arff into non sparse form
    SparseToNonSparse spfilter = new SparseToNonSparse();
    spfilter.setInputFormat(labels);
    Instances aeData = Filter.useFilter(labels, spfilter);

    // now convert it into a format suitable for the autoencoder
    Mat data = wekaStatics.instancesToMat(aeData);

    if (this.getAE() == null) {
        Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java(
                autoencoderStatics.Sigmoid(), // type of neurons.
                // Sigmoid is ok
                this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) /
                // 2
                this.getCompression(), // compression from k-th layer to (k+1)-th layer
                data, // training data 
                true, // true = L2 Error, false = CrossEntropy
                autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers());
        int itercount = 0;
        for (Autoencoder a : autoencoders) {
            itercount++;
            if (topiter > 0 && itercount == topiter || itercount == this.getNumberAutoencoders()) {
                this.setAE(a);
                break;
            }
        }
    }

    Mat compressed = this.getAE().compress(data);
    Instances compressedLabels = wekaStatics.matToInstances(compressed);

    // remember the labels to use for the prediction step,
    this.compressedTemplateInst = new Instances(compressedLabels);

    Instances result = Instances.mergeInstances(compressedLabels, features);

    result.setClassIndex(compressedLabels.numAttributes());

    return result;
}

From source file:meka.experiment.evaluators.CrossValidation.java

License:Open Source License

/**
 * Returns the evaluation statistics generated for the dataset (sequential execution).
 *
 * @param classifier    the classifier to evaluate
 * @param dataset       the dataset to evaluate on
 * @return              the statistics/*  www. j  a v  a 2s.c  o  m*/
 */
protected List<EvaluationStatistics> evaluateSequential(MultiLabelClassifier classifier, Instances dataset) {
    List<EvaluationStatistics> result;
    EvaluationStatistics stats;
    Instances train;
    Instances test;
    Result res;
    int i;
    Random rand;
    MultiLabelClassifier current;

    result = new ArrayList<>();
    rand = new Random(m_Seed);
    for (i = 1; i <= m_NumFolds; i++) {
        log("Fold: " + i);
        if (m_PreserveOrder)
            train = dataset.trainCV(m_NumFolds, i - 1);
        else
            train = dataset.trainCV(m_NumFolds, i - 1, rand);
        test = dataset.testCV(m_NumFolds, i - 1);
        try {
            current = (MultiLabelClassifier) OptionUtils.shallowCopy(classifier);
            res = Evaluation.evaluateModel(current, train, test, m_Threshold, m_Verbosity);
            stats = new EvaluationStatistics(classifier, dataset, res);
            stats.put(KEY_FOLD, i);
            result.add(stats);
        } catch (Exception e) {
            handleException("Failed to evaluate dataset '" + dataset.relationName() + "' with classifier: "
                    + Utils.toCommandLine(classifier), e);
            break;
        }

        if (m_Stopped)
            break;
    }

    if (m_Stopped)
        result.clear();

    return result;
}

From source file:meka.experiment.evaluators.CrossValidation.java

License:Open Source License

/**
 * Returns the evaluation statistics generated for the dataset (parallel execution).
 *
 * @param classifier    the classifier to evaluate
 * @param dataset       the dataset to evaluate on
 * @return              the statistics/*from w ww  .ja  v  a2  s  . c  o  m*/
 */
protected List<EvaluationStatistics> evaluateParallel(final MultiLabelClassifier classifier,
        final Instances dataset) {
    List<EvaluationStatistics> result;
    ArrayList<EvaluatorJob> jobs;
    EvaluatorJob job;
    int i;
    Random rand;

    result = new ArrayList<>();

    debug("pre: create jobs");
    jobs = new ArrayList<>();
    rand = new Random(m_Seed);
    for (i = 1; i <= m_NumFolds; i++) {
        final int index = i;
        final Instances train;
        final Instances test;
        final MultiLabelClassifier current;
        if (m_PreserveOrder)
            train = dataset.trainCV(m_NumFolds, index - 1);
        else
            train = dataset.trainCV(m_NumFolds, index - 1, rand);
        test = dataset.testCV(m_NumFolds, index - 1);
        current = (MultiLabelClassifier) OptionUtils.shallowCopy(classifier);
        job = new EvaluatorJob() {
            protected List<EvaluationStatistics> doCall() throws Exception {
                List<EvaluationStatistics> result = new ArrayList<>();
                log("Executing fold #" + index + "...");
                try {
                    Result res = Evaluation.evaluateModel(current, train, test, m_Threshold, m_Verbosity);
                    EvaluationStatistics stats = new EvaluationStatistics(classifier, dataset, res);
                    stats.put(KEY_FOLD, index);
                    result.add(stats);
                } catch (Exception e) {
                    handleException("Failed to evaluate dataset '" + dataset.relationName()
                            + "' with classifier: " + Utils.toCommandLine(classifier), e);
                }
                log("...finished fold #" + index);
                return result;
            }
        };
        jobs.add(job);
    }
    debug("post: create jobs");

    // execute jobs
    m_Executor = Executors.newFixedThreadPool(m_ActualNumThreads);
    debug("pre: submit");
    try {
        for (i = 0; i < jobs.size(); i++)
            m_Executor.submit(jobs.get(i));
    } catch (RejectedExecutionException e) {
        // ignored
    } catch (Exception e) {
        handleException("Failed to start up jobs", e);
    }
    debug("post: submit");

    debug("pre: shutdown");
    m_Executor.shutdown();
    debug("post: shutdown");

    // wait for threads to finish
    debug("pre: wait");
    while (!m_Executor.isTerminated()) {
        try {
            m_Executor.awaitTermination(100, TimeUnit.MILLISECONDS);
        } catch (InterruptedException e) {
            // ignored
        } catch (Exception e) {
            handleException("Failed to await termination", e);
        }
    }
    debug("post: wait");

    // collect results
    debug("pre: collect");
    for (i = 0; i < jobs.size(); i++)
        result.addAll(jobs.get(i).getResult());
    debug("post: collect");

    return result;
}

From source file:mlpoc.MLPOC.java

public static Evaluation crossValidate(String filename) {
    Evaluation eval = null;/* ww  w.  ja v  a  2 s . c  o  m*/
    try {
        BufferedReader br = new BufferedReader(new FileReader(filename));
        // loads data and set class index
        Instances data = new Instances(br);
        br.close();
        /*File csv=new File(filename);
        CSVLoader loader = new CSVLoader();
        loader.setSource(csv);
        Instances data = loader.getDataSet();*/
        data.setClassIndex(data.numAttributes() - 1);

        // classifier
        String[] tmpOptions;
        String classname = "weka.classifiers.trees.J48 -C 0.25";
        tmpOptions = classname.split(" ");
        classname = "weka.classifiers.trees.J48";
        tmpOptions[0] = "";
        Classifier cls = (Classifier) Utils.forName(Classifier.class, classname, tmpOptions);

        // other options
        int seed = 2;
        int folds = 10;

        // randomize data
        Random rand = new Random(seed);
        Instances randData = new Instances(data);
        randData.randomize(rand);
        if (randData.classAttribute().isNominal())
            randData.stratify(folds);

        // perform cross-validation
        eval = new Evaluation(randData);
        for (int n = 0; n < folds; n++) {
            Instances train = randData.trainCV(folds, n);
            Instances test = randData.testCV(folds, n);
            // the above code is used by the StratifiedRemoveFolds filter, the
            // code below by the Explorer/Experimenter:
            // Instances train = randData.trainCV(folds, n, rand);

            // build and evaluate classifier
            Classifier clsCopy = Classifier.makeCopy(cls);
            clsCopy.buildClassifier(train);
            eval.evaluateModel(clsCopy, test);
        }

        // output evaluation
        System.out.println();
        System.out.println("=== Setup ===");
        System.out
                .println("Classifier: " + cls.getClass().getName() + " " + Utils.joinOptions(cls.getOptions()));
        System.out.println("Dataset: " + data.relationName());
        System.out.println("Folds: " + folds);
        System.out.println("Seed: " + seed);
        System.out.println();
        System.out.println(eval.toSummaryString("Summary for testing", true));
        System.out.println("Correctly Classified Instances: " + eval.correct());
        System.out.println("Percentage of Correctly Classified Instances: " + eval.pctCorrect());
        System.out.println("InCorrectly Classified Instances: " + eval.incorrect());
        System.out.println("Percentage of InCorrectly Classified Instances: " + eval.pctIncorrect());

    } catch (Exception ex) {
        System.err.println(ex.getMessage());
    }
    return eval;
}

From source file:moa.classifiers.AccuracyWeightedEnsemble.java

License:Open Source License

/**
 * Computes the weight of a candidate classifier.
 * @param candidate Candidate classifier.
 * @param chunk Data chunk of examples.// ww w  .j  ava2 s  .  com
 * @param numFolds Number of folds in candidate classifier cross-validation.
 * @param useMseR Determines whether to use the MSEr threshold.
 * @return Candidate classifier weight.
 */
protected double computeCandidateWeight(Classifier candidate, Instances chunk, int numFolds) {
    double candidateWeight = 0.0;
    Random random = new Random(1);
    Instances randData = new Instances(chunk);
    randData.randomize(random);
    if (randData.classAttribute().isNominal()) {
        randData.stratify(numFolds);
    }

    for (int n = 0; n < numFolds; n++) {
        Instances train = randData.trainCV(numFolds, n, random);
        Instances test = randData.testCV(numFolds, n);

        Classifier learner = candidate.copy();

        for (int num = 0; num < train.numInstances(); num++) {
            learner.trainOnInstance(train.instance(num));
        }

        candidateWeight += computeWeight(learner, test);
    }

    double resultWeight = candidateWeight / numFolds;

    if (Double.isInfinite(resultWeight)) {
        return Double.MAX_VALUE;
    } else {
        return resultWeight;
    }
}

From source file:mulan.data.LabelPowersetStratification.java

License:Open Source License

public MultiLabelInstances[] stratify(MultiLabelInstances data, int folds) {
    try {//from  w  w w .  ja va2s.  c o m
        MultiLabelInstances[] segments = new MultiLabelInstances[folds];
        LabelPowersetTransformation transformation = new LabelPowersetTransformation();
        Instances transformed;

        // transform to single-label
        transformed = transformation.transformInstances(data);

        // add id 
        Add add = new Add();
        add.setAttributeIndex("first");
        add.setAttributeName("instanceID");
        add.setInputFormat(transformed);
        transformed = Filter.useFilter(transformed, add);
        for (int i = 0; i < transformed.numInstances(); i++) {
            transformed.instance(i).setValue(0, i);
        }
        transformed.setClassIndex(transformed.numAttributes() - 1);

        // stratify
        transformed.randomize(new Random(seed));
        transformed.stratify(folds);

        for (int i = 0; i < folds; i++) {
            //System.out.println("Fold " + (i + 1) + "/" + folds);
            Instances temp = transformed.testCV(folds, i);
            Instances test = new Instances(data.getDataSet(), 0);
            for (int j = 0; j < temp.numInstances(); j++) {
                test.add(data.getDataSet().instance((int) temp.instance(j).value(0)));
            }
            segments[i] = new MultiLabelInstances(test, data.getLabelsMetaData());
        }
        return segments;
    } catch (Exception ex) {
        Logger.getLogger(LabelPowersetStratification.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    }
}