Example usage for weka.core Instances trainCV

List of usage examples for weka.core Instances trainCV

Introduction

In this page you can find the example usage for weka.core Instances trainCV.

Prototype



public Instances trainCV(int numFolds, int numFold) 

Source Link

Document

Creates the training set for one fold of a cross-validation on the dataset.

Usage

From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java

private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier,
        Instances pInstances, String pModelName, String pClassifierName) throws Exception {

    // other options
    int folds = 10;

    // randomize data
    Random rand = new Random(42);
    Instances randData = new Instances(pInstances);
    randData.randomize(rand);/* w w  w .  j  a  v  a2s  . c  o  m*/
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Instances predictedData = null;
    Evaluation eval = new Evaluation(randData);

    int positiveValueIndexOfClassFeature = 0;
    for (int n = 0; n < folds; n++) {
        Instances train = randData.trainCV(folds, n);
        Instances test = randData.testCV(folds, n);
        // the above code is used by the StratifiedRemoveFolds filter, the
        // code below by the Explorer/Experimenter:
        // Instances train = randData.trainCV(folds, n, rand);

        int classFeatureIndex = 0;
        for (int i = 0; i < train.numAttributes(); i++) {
            if (train.attribute(i).name().equals("isBuggy")) {
                classFeatureIndex = i;
                break;
            }
        }

        Attribute classFeature = train.attribute(classFeatureIndex);
        for (int i = 0; i < classFeature.numValues(); i++) {
            if (classFeature.value(i).equals("TRUE")) {
                positiveValueIndexOfClassFeature = i;
            }
        }

        train.setClassIndex(classFeatureIndex);
        test.setClassIndex(classFeatureIndex);

        // build and evaluate classifier
        pClassifier.buildClassifier(train);
        eval.evaluateModel(pClassifier, test);

        // add predictions
        //           AddClassification filter = new AddClassification();
        //           filter.setClassifier(pClassifier);
        //           filter.setOutputClassification(true);
        //           filter.setOutputDistribution(true);
        //           filter.setOutputErrorFlag(true);
        //           filter.setInputFormat(train);
        //           Filter.useFilter(train, filter); 
        //           Instances pred = Filter.useFilter(test, filter); 
        //           if (predictedData == null)
        //             predictedData = new Instances(pred, 0);
        //           
        //           for (int j = 0; j < pred.numInstances(); j++)
        //             predictedData.add(pred.instance(j));
    }
    double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature)
            + eval.numTrueNegatives(positiveValueIndexOfClassFeature))
            / (eval.numTruePositives(positiveValueIndexOfClassFeature)
                    + eval.numFalsePositives(positiveValueIndexOfClassFeature)
                    + eval.numFalseNegatives(positiveValueIndexOfClassFeature)
                    + eval.numTrueNegatives(positiveValueIndexOfClassFeature));

    double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature)
            * eval.recall(positiveValueIndexOfClassFeature))
            / (eval.precision(positiveValueIndexOfClassFeature)
                    + eval.recall(positiveValueIndexOfClassFeature)));
    File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv");
    PrintWriter pw1 = new PrintWriter(wekaOutput);

    pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";"
            + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";"
            + eval.areaUnderROC(positiveValueIndexOfClassFeature));

    System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";"
            + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";"
            + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";"
            + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";"
            + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";"
            + eval.precision(positiveValueIndexOfClassFeature) + ";"
            + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";"
            + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n");
}

From source file:jjj.asap.sas.ensemble.impl.CrossValidatedEnsemble.java

License:Open Source License

@Override
public StrongLearner build(int essaySet, String ensembleName, List<WeakLearner> learners) {

    // can't handle empty case
    if (learners.isEmpty()) {
        return this.ensemble.build(essaySet, ensembleName, learners);
    }//from  ww  w  . j a v  a 2s .c  o m

    // create a dummy dataset.
    DatasetBuilder builder = new DatasetBuilder();
    builder.addVariable("id");
    builder.addNominalVariable("class", Contest.getRubrics(essaySet));
    Instances dummy = builder.getDataset("dummy");

    // add data
    Map<Double, Double> groundTruth = Contest.getGoldStandard(essaySet);
    for (double id : learners.get(0).getPreds().keySet()) {
        dummy.add(new DenseInstance(1.0, new double[] { id, groundTruth.get(id) }));
    }

    // stratify
    dummy.sort(0);
    dummy.randomize(new Random(1));
    dummy.setClassIndex(1);
    dummy.stratify(nFolds);

    // now evaluate each fold
    Map<Double, Double> preds = new HashMap<Double, Double>();
    for (int k = 0; k < nFolds; k++) {
        Instances train = dummy.trainCV(nFolds, k);
        Instances test = dummy.testCV(nFolds, k);

        List<WeakLearner> cvLeaners = new ArrayList<WeakLearner>();
        for (WeakLearner learner : learners) {
            WeakLearner copy = learner.copyOf();
            for (int i = 0; i < test.numInstances(); i++) {
                copy.getPreds().remove(test.instance(i).value(0));
                copy.getProbs().remove(test.instance(i).value(0));
            }
            cvLeaners.add(copy);
        }

        // train on fold
        StrongLearner cv = this.ensemble.build(essaySet, ensembleName, cvLeaners);

        List<WeakLearner> testLeaners = new ArrayList<WeakLearner>();
        for (WeakLearner learner : cv.getLearners()) {
            WeakLearner copy = learner.copyOf();
            copy.getPreds().clear();
            copy.getProbs().clear();
            WeakLearner source = find(copy.getName(), learners);
            for (int i = 0; i < test.numInstances(); i++) {
                double id = test.instance(i).value(0);
                copy.getPreds().put(id, source.getPreds().get(id));
                copy.getProbs().put(id, source.getProbs().get(id));
            }
            testLeaners.add(copy);
        }

        preds.putAll(this.ensemble.classify(essaySet, ensembleName, testLeaners, cv.getContext()));
    }

    // now prepare final result

    StrongLearner strong = this.ensemble.build(essaySet, ensembleName, learners);

    double trainingError = strong.getKappa();
    double cvError = Calc.kappa(essaySet, preds, groundTruth);
    //   Job.log(essaySet+"-"+ensembleName, "XVAL: training error = " + trainingError + " cv error = " + cvError);      

    strong.setKappa(cvError);
    return strong;
}

From source file:liac.igmn.evaluation.Evaluator.java

License:Open Source License

public void crossValidation(IGMN model, Dataset dataset, int numFolds, int runs, boolean randomize) {
    confusionMatrix = new ConfusionMatrix(dataset.getClassesNames());

    Instances instances = dataset.getWekaDataset();
    int seed = 1;
    for (int run = 0; run < runs; run++) {
        if (randomize) {
            instances.randomize(new Random(seed));
            seed += 1;/*  w  w  w  .j  a v a 2  s  . c  o m*/
        }

        if (verbose)
            System.out.println("RUN: " + (run + 1));

        for (int n = 0; n < numFolds; n++) {
            Instances train = instances.trainCV(numFolds, n);
            Instances test = instances.testCV(numFolds, n);

            SimpleMatrix trainData = MatrixUtil.instancesToMatrix(train);
            SimpleMatrix testData = MatrixUtil.instancesToMatrix(test);

            model.reset();

            if (verbose)
                System.out.println("TRAINING FOLD: " + (n + 1));

            model.train(trainData);

            if (verbose)
                System.out.println("TESTING...");

            SimpleMatrix testInputs = testData.extractMatrix(0, dataset.getInputSize(), 0, SimpleMatrix.END);
            SimpleMatrix testTargets = testData.extractMatrix(dataset.getInputSize(),
                    dataset.getNumAttributes(), 0, SimpleMatrix.END);
            for (int i = 0; i < testInputs.numCols(); i++) {
                SimpleMatrix y = model.classify(testInputs.extractVector(false, i));
                SimpleMatrix target = testTargets.extractVector(false, i);

                int tInd = MatrixUtil.maxElementIndex(target);
                int yInd = MatrixUtil.maxElementIndex(y);

                confusionMatrix.addPrediction(tInd, yInd);
            }
        }
    }
    confusionMatrix.set(confusionMatrix.divide(runs));
}

From source file:machinelearningcw.EnhancedLinearPerceptron.java

public boolean crossValidation(Instances ins) throws Exception {
    //get the data
    Instances data = new Instances(ins);
    Instances train;// the new training data
    Instances test; // the new testing data

    int seed = 0;
    Random rand = new Random(seed);
    //randomize the data
    data.randomize(rand);// w  w  w  .  j a  v  a 2  s. c  o m

    //number of folds
    int folds = 10;
    int offlineErrors = 0;
    int onlineErrors = 0;

    for (int i = 0; i < folds; i++) {
        train = data.trainCV(folds, i);
        test = data.testCV(folds, i);

        //add the the total errors for each
        //offlineErrors += 
        offlinePerceptron(train);
        for (Instance inst : test) {
            if (classifyInstance(inst) != inst.classValue()) {
                offlineErrors += 1;
            }

        }
        //reset w
        Arrays.fill(w, 1);
        perceptron(train);
        for (Instance inst : test) {
            if (classifyInstance(inst) != inst.classValue()) {
                onlineErrors += 1;
            }
        }

    }
    //  System.out.println(" off: " + offlineErrors);
    //    System.out.println(" on: " + onlineErrors);
    //calculate the mean of the total errors
    offlineErrors = offlineErrors / folds;
    onlineErrors = onlineErrors / folds;
    // System.out.println(flag);
    return offlineErrors > onlineErrors;

}

From source file:mao.datamining.ModelProcess.java

private void testCV(Classifier classifier, Instances finalTrainDataSet, FileOutputStream testCaseSummaryOut,
        TestResult result) {//from   w w  w  .j a v a2s .  com
    long start, end, trainTime = 0, testTime = 0;
    Evaluation evalAll = null;
    double confusionMatrix[][] = null;
    // randomize data, and then stratify it into 10 groups
    Random rand = new Random(1);
    Instances randData = new Instances(finalTrainDataSet);
    randData.randomize(rand);
    if (randData.classAttribute().isNominal()) {
        //always run with 10 cross validation
        randData.stratify(folds);
    }

    try {
        evalAll = new Evaluation(randData);
        for (int i = 0; i < folds; i++) {
            Evaluation eval = new Evaluation(randData);
            Instances train = randData.trainCV(folds, i);
            Instances test = randData.testCV(folds, i);
            //counting traininig time
            start = System.currentTimeMillis();
            Classifier j48ClassifierCopy = Classifier.makeCopy(classifier);
            j48ClassifierCopy.buildClassifier(train);
            end = System.currentTimeMillis();
            trainTime += end - start;

            //counting test time
            start = System.currentTimeMillis();
            eval.evaluateModel(j48ClassifierCopy, test);
            evalAll.evaluateModel(j48ClassifierCopy, test);
            end = System.currentTimeMillis();
            testTime += end - start;
        }

    } catch (Exception e) {
        ModelProcess.logging(null, e);
    } //end test by cross validation

    // output evaluation
    try {
        ModelProcess.logging("");
        //write into summary file
        testCaseSummaryOut
                .write((evalAll.toSummaryString("=== Cross Validation Summary ===", true)).getBytes());
        testCaseSummaryOut.write("\n".getBytes());
        testCaseSummaryOut.write(
                (evalAll.toClassDetailsString("=== " + folds + "-fold Cross-validation Class Detail ===\n"))
                        .getBytes());
        testCaseSummaryOut.write("\n".getBytes());
        testCaseSummaryOut
                .write((evalAll.toMatrixString("=== Confusion matrix for all folds ===\n")).getBytes());
        testCaseSummaryOut.flush();

        confusionMatrix = evalAll.confusionMatrix();
        result.setConfusionMatrix10Folds(confusionMatrix);
    } catch (Exception e) {
        ModelProcess.logging(null, e);
    }
}

From source file:meka.classifiers.multilabel.Evaluation.java

License:Open Source License

/**
 * CVModel - Split D into train/test folds, and then train and evaluate on each one.
 * @param   h       a multi-output classifier
 * @param   D          test data Instances
 * @param   numFolds number of folds of CV
 * @param   top        Threshold OPtion (pertains to multi-label data only)
 * @param   vop       Verbosity OPtion (which measures do we want to calculate/output)
 * @return   Result   raw prediction data with evaluation statistics included.
 *//*from  ww w.ja v a 2 s. c  o m*/
public static Result cvModel(MultiLabelClassifier h, Instances D, int numFolds, String top, String vop)
        throws Exception {
    Result r_[] = new Result[numFolds];
    for (int i = 0; i < numFolds; i++) {
        Instances D_train = D.trainCV(numFolds, i);
        Instances D_test = D.testCV(numFolds, i);
        if (h.getDebug())
            System.out.println(":- Fold [" + i + "/" + numFolds + "] -: " + MLUtils.getDatasetName(D) + "\tL="
                    + D.classIndex() + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances()
                    + ")\tLC(t:T)=" + Utils.roundDouble(MLUtils.labelCardinality(D_train, D.classIndex()), 2)
                    + ":" + Utils.roundDouble(MLUtils.labelCardinality(D_test, D.classIndex()), 2) + ")");
        r_[i] = evaluateModel(h, D_train, D_test); // <-- should not run stats yet!
    }
    Result r = MLEvalUtils.combinePredictions(r_);
    if (h instanceof MultiTargetClassifier || isMT(D)) {
        r.setInfo("Type", "MT-CV");
    } else if (h instanceof MultiLabelClassifier) {
        r.setInfo("Type", "ML-CV");
        try {
            r.setInfo("Threshold", String.valueOf(Double.parseDouble(top)));
        } catch (Exception e) {
            System.err.println(
                    "[WARNING] Automatic threshold calibration not currently enabled for cross-fold validation, setting threshold = 0.5.\n");
            r.setInfo("Threshold", String.valueOf(0.5));
        }
    }
    r.setInfo("Verbosity", vop);
    r.output = Result.getStats(r, vop);
    // Need to reset this because of CV
    r.setValue("Number of training instances", D.numInstances());
    r.setValue("Number of test instances", D.numInstances());
    return r;
}

From source file:meka.classifiers.multilabel.Maniac.java

License:Open Source License

@Override
public Instances transformLabels(Instances D) throws Exception {
    // crazy scala-specific stuff that is necessary to access
    // "static" methods from java
    org.kramerlab.autoencoder.package$ autoencoderStatics = org.kramerlab.autoencoder.package$.MODULE$;

    org.kramerlab.autoencoder.wekacompatibility.package$ wekaStatics = org.kramerlab.autoencoder.wekacompatibility.package$.MODULE$;

    org.kramerlab.autoencoder.experiments.package$ experimentsStatics = org.kramerlab.autoencoder.experiments.package$.MODULE$;

    int topiter = -1;

    // the optimization is a bit special, since we learn a stream
    // of autoencoders, no need to start from scratch, we just add layers
    if (this.isOptimizeAE()) {
        Instances train = D.trainCV(3, 1);
        Instances test = D.testCV(3, 1);
        Instances labels = this.extractPart(train, true);

        // first convert the arff into non sparse form
        SparseToNonSparse spfilter = new SparseToNonSparse();
        spfilter.setInputFormat(labels);
        Instances aeData = Filter.useFilter(labels, spfilter);

        // now convert it into a format suitable for the autoencoder
        Mat data = wekaStatics.instancesToMat(aeData);

        Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java(
                autoencoderStatics.Sigmoid(), // type of neurons.
                // Sigmoid is ok
                this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) /
                // 2
                this.getCompression(), // compression from k-th layer to (k+1)-th layer
                data, // training data 
                true, // true = L2 Error, false = CrossEntropy
                autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers());

        // test each autoencoder, select the best classifier
        double bestAccuracy = Double.NEGATIVE_INFINITY;
        int iteratorcount = 0;
        topiter = 0;/*from w  w  w .ja  va 2s.c  o m*/
        for (Autoencoder a : autoencoders) {
            iteratorcount++;

            Maniac candidate = new Maniac();
            candidate.setOptimizeAE(false);
            candidate.setNumberAutoencoders(this.getNumberAutoencoders());
            candidate.setCompression(this.getCompression());
            candidate.setClassifier(this.getClassifier());

            candidate.setAE(a);

            Result res = Evaluation.evaluateModel(candidate, train, test);
            double curac = (Double) res.getValue("Accuracy");

            if (bestAccuracy < curac) {
                bestAccuracy = curac;
                topiter = iteratorcount;
            }
        }
    }
    Instances features = this.extractPart(D, false);
    Instances labels = this.extractPart(D, true);

    // first convert the arff into non sparse form
    SparseToNonSparse spfilter = new SparseToNonSparse();
    spfilter.setInputFormat(labels);
    Instances aeData = Filter.useFilter(labels, spfilter);

    // now convert it into a format suitable for the autoencoder
    Mat data = wekaStatics.instancesToMat(aeData);

    if (this.getAE() == null) {
        Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java(
                autoencoderStatics.Sigmoid(), // type of neurons.
                // Sigmoid is ok
                this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) /
                // 2
                this.getCompression(), // compression from k-th layer to (k+1)-th layer
                data, // training data 
                true, // true = L2 Error, false = CrossEntropy
                autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers());
        int itercount = 0;
        for (Autoencoder a : autoencoders) {
            itercount++;
            if (topiter > 0 && itercount == topiter || itercount == this.getNumberAutoencoders()) {
                this.setAE(a);
                break;
            }
        }
    }

    Mat compressed = this.getAE().compress(data);
    Instances compressedLabels = wekaStatics.matToInstances(compressed);

    // remember the labels to use for the prediction step,
    this.compressedTemplateInst = new Instances(compressedLabels);

    Instances result = Instances.mergeInstances(compressedLabels, features);

    result.setClassIndex(compressedLabels.numAttributes());

    return result;
}

From source file:meka.experiment.evaluators.CrossValidation.java

License:Open Source License

/**
 * Returns the evaluation statistics generated for the dataset (sequential execution).
 *
 * @param classifier    the classifier to evaluate
 * @param dataset       the dataset to evaluate on
 * @return              the statistics/*w w  w .j a v  a 2 s  .c o m*/
 */
protected List<EvaluationStatistics> evaluateSequential(MultiLabelClassifier classifier, Instances dataset) {
    List<EvaluationStatistics> result;
    EvaluationStatistics stats;
    Instances train;
    Instances test;
    Result res;
    int i;
    Random rand;
    MultiLabelClassifier current;

    result = new ArrayList<>();
    rand = new Random(m_Seed);
    for (i = 1; i <= m_NumFolds; i++) {
        log("Fold: " + i);
        if (m_PreserveOrder)
            train = dataset.trainCV(m_NumFolds, i - 1);
        else
            train = dataset.trainCV(m_NumFolds, i - 1, rand);
        test = dataset.testCV(m_NumFolds, i - 1);
        try {
            current = (MultiLabelClassifier) OptionUtils.shallowCopy(classifier);
            res = Evaluation.evaluateModel(current, train, test, m_Threshold, m_Verbosity);
            stats = new EvaluationStatistics(classifier, dataset, res);
            stats.put(KEY_FOLD, i);
            result.add(stats);
        } catch (Exception e) {
            handleException("Failed to evaluate dataset '" + dataset.relationName() + "' with classifier: "
                    + Utils.toCommandLine(classifier), e);
            break;
        }

        if (m_Stopped)
            break;
    }

    if (m_Stopped)
        result.clear();

    return result;
}

From source file:meka.experiment.evaluators.CrossValidation.java

License:Open Source License

/**
 * Returns the evaluation statistics generated for the dataset (parallel execution).
 *
 * @param classifier    the classifier to evaluate
 * @param dataset       the dataset to evaluate on
 * @return              the statistics//from  ww w.ja v  a2s.c o m
 */
protected List<EvaluationStatistics> evaluateParallel(final MultiLabelClassifier classifier,
        final Instances dataset) {
    List<EvaluationStatistics> result;
    ArrayList<EvaluatorJob> jobs;
    EvaluatorJob job;
    int i;
    Random rand;

    result = new ArrayList<>();

    debug("pre: create jobs");
    jobs = new ArrayList<>();
    rand = new Random(m_Seed);
    for (i = 1; i <= m_NumFolds; i++) {
        final int index = i;
        final Instances train;
        final Instances test;
        final MultiLabelClassifier current;
        if (m_PreserveOrder)
            train = dataset.trainCV(m_NumFolds, index - 1);
        else
            train = dataset.trainCV(m_NumFolds, index - 1, rand);
        test = dataset.testCV(m_NumFolds, index - 1);
        current = (MultiLabelClassifier) OptionUtils.shallowCopy(classifier);
        job = new EvaluatorJob() {
            protected List<EvaluationStatistics> doCall() throws Exception {
                List<EvaluationStatistics> result = new ArrayList<>();
                log("Executing fold #" + index + "...");
                try {
                    Result res = Evaluation.evaluateModel(current, train, test, m_Threshold, m_Verbosity);
                    EvaluationStatistics stats = new EvaluationStatistics(classifier, dataset, res);
                    stats.put(KEY_FOLD, index);
                    result.add(stats);
                } catch (Exception e) {
                    handleException("Failed to evaluate dataset '" + dataset.relationName()
                            + "' with classifier: " + Utils.toCommandLine(classifier), e);
                }
                log("...finished fold #" + index);
                return result;
            }
        };
        jobs.add(job);
    }
    debug("post: create jobs");

    // execute jobs
    m_Executor = Executors.newFixedThreadPool(m_ActualNumThreads);
    debug("pre: submit");
    try {
        for (i = 0; i < jobs.size(); i++)
            m_Executor.submit(jobs.get(i));
    } catch (RejectedExecutionException e) {
        // ignored
    } catch (Exception e) {
        handleException("Failed to start up jobs", e);
    }
    debug("post: submit");

    debug("pre: shutdown");
    m_Executor.shutdown();
    debug("post: shutdown");

    // wait for threads to finish
    debug("pre: wait");
    while (!m_Executor.isTerminated()) {
        try {
            m_Executor.awaitTermination(100, TimeUnit.MILLISECONDS);
        } catch (InterruptedException e) {
            // ignored
        } catch (Exception e) {
            handleException("Failed to await termination", e);
        }
    }
    debug("post: wait");

    // collect results
    debug("pre: collect");
    for (i = 0; i < jobs.size(); i++)
        result.addAll(jobs.get(i).getResult());
    debug("post: collect");

    return result;
}

From source file:mlpoc.MLPOC.java

public static Evaluation crossValidate(String filename) {
    Evaluation eval = null;//from www . j a v a 2 s .c  o  m
    try {
        BufferedReader br = new BufferedReader(new FileReader(filename));
        // loads data and set class index
        Instances data = new Instances(br);
        br.close();
        /*File csv=new File(filename);
        CSVLoader loader = new CSVLoader();
        loader.setSource(csv);
        Instances data = loader.getDataSet();*/
        data.setClassIndex(data.numAttributes() - 1);

        // classifier
        String[] tmpOptions;
        String classname = "weka.classifiers.trees.J48 -C 0.25";
        tmpOptions = classname.split(" ");
        classname = "weka.classifiers.trees.J48";
        tmpOptions[0] = "";
        Classifier cls = (Classifier) Utils.forName(Classifier.class, classname, tmpOptions);

        // other options
        int seed = 2;
        int folds = 10;

        // randomize data
        Random rand = new Random(seed);
        Instances randData = new Instances(data);
        randData.randomize(rand);
        if (randData.classAttribute().isNominal())
            randData.stratify(folds);

        // perform cross-validation
        eval = new Evaluation(randData);
        for (int n = 0; n < folds; n++) {
            Instances train = randData.trainCV(folds, n);
            Instances test = randData.testCV(folds, n);
            // the above code is used by the StratifiedRemoveFolds filter, the
            // code below by the Explorer/Experimenter:
            // Instances train = randData.trainCV(folds, n, rand);

            // build and evaluate classifier
            Classifier clsCopy = Classifier.makeCopy(cls);
            clsCopy.buildClassifier(train);
            eval.evaluateModel(clsCopy, test);
        }

        // output evaluation
        System.out.println();
        System.out.println("=== Setup ===");
        System.out
                .println("Classifier: " + cls.getClass().getName() + " " + Utils.joinOptions(cls.getOptions()));
        System.out.println("Dataset: " + data.relationName());
        System.out.println("Folds: " + folds);
        System.out.println("Seed: " + seed);
        System.out.println();
        System.out.println(eval.toSummaryString("Summary for testing", true));
        System.out.println("Correctly Classified Instances: " + eval.correct());
        System.out.println("Percentage of Correctly Classified Instances: " + eval.pctCorrect());
        System.out.println("InCorrectly Classified Instances: " + eval.incorrect());
        System.out.println("Percentage of InCorrectly Classified Instances: " + eval.pctIncorrect());

    } catch (Exception ex) {
        System.err.println(ex.getMessage());
    }
    return eval;
}