Example usage for weka.core Instances testCV

List of usage examples for weka.core Instances testCV

Introduction

In this page you can find the example usage for weka.core Instances testCV.

Prototype



public Instances testCV(int numFolds, int numFold) 

Source Link

Document

Creates the test set for one fold of a cross-validation on the dataset.

Usage

From source file:entity.NfoldCrossValidationManager.java

License:Open Source License

/**
 * n fold cross validation with noise (independent fp and fn)
 * //from   ww w. ja va2s  . co  m
 * @param classifier
 * @param dataset
 * @param folds
 * @return
 */
public Stats crossValidateWithNoise(Classifier classifier, Instances dataset, int folds,
        BigDecimal fpPercentage, BigDecimal fnPercentage) {

    // noise manager
    NoiseInjectionManager noiseInjectionManager = new NoiseInjectionManager();

    // randomizes order of instances
    Instances randDataset = new Instances(dataset);
    randDataset.randomize(RandomizationManager.randomGenerator);

    // cross-validation
    Evaluation eval = null;
    try {
        eval = new Evaluation(randDataset);
    } catch (Exception e) {
        e.printStackTrace();
    }
    for (int n = 0; n < folds; n++) {
        Instances test = randDataset.testCV(folds, n);
        Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator);

        // copies instances of train set to not modify the original
        Instances noisyTrain = new Instances(train);
        // injects level of noise in the copied train set
        noiseInjectionManager.addNoiseToDataset(noisyTrain, fpPercentage, fnPercentage);

        // build and evaluate classifier
        Classifier clsCopy;
        try {
            clsCopy = Classifier.makeCopy(classifier);
            // trains the model using a noisy train set
            clsCopy.buildClassifier(noisyTrain);
            eval.evaluateModel(clsCopy, test);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    // output evaluation for the nfold cross validation
    Double precision = eval.precision(Settings.classificationChoice);
    Double recall = eval.recall(Settings.classificationChoice);
    Double fmeasure = eval.fMeasure(Settings.classificationChoice);
    Double classificationTP = eval.numTruePositives(Settings.classificationChoice);
    Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice);
    Double classificationFP = eval.numFalsePositives(Settings.classificationChoice);
    Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice);
    Double kappa = eval.kappa();

    return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision,
            recall, fmeasure);
}

From source file:entity.NfoldCrossValidationManager.java

License:Open Source License

/**
 * n fold cross validation with noise (combined fp and fn)
 * /*from  w  ww.  j  a  va 2 s . c o m*/
 * @param classifier
 * @param dataset
 * @param folds
 * @return
 */

public Stats crossValidateWithNoise(Classifier classifier, Instances dataset, int folds,
        BigDecimal combinedFpFnPercentage) {

    // noise manager
    NoiseInjectionManager noiseInjectionManager = new NoiseInjectionManager();

    // randomizes order of instances
    Instances randDataset = new Instances(dataset);
    randDataset.randomize(RandomizationManager.randomGenerator);

    // cross-validation
    Evaluation eval = null;
    try {
        eval = new Evaluation(randDataset);
    } catch (Exception e) {
        e.printStackTrace();
    }
    for (int n = 0; n < folds; n++) {
        Instances test = randDataset.testCV(folds, n);
        Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator);

        // copies instances of train set to not modify the original
        Instances noisyTrain = new Instances(train);
        // injects level of noise in the copied train set
        noiseInjectionManager.addNoiseToDataset(noisyTrain, combinedFpFnPercentage);

        // build and evaluate classifier
        Classifier clsCopy;
        try {
            clsCopy = Classifier.makeCopy(classifier);
            // trains the model using a noisy train set
            clsCopy.buildClassifier(noisyTrain);
            eval.evaluateModel(clsCopy, test);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    // output evaluation for the nfold cross validation
    Double precision = eval.precision(Settings.classificationChoice);
    Double recall = eval.recall(Settings.classificationChoice);
    Double fmeasure = eval.fMeasure(Settings.classificationChoice);
    Double classificationTP = eval.numTruePositives(Settings.classificationChoice);
    Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice);
    Double classificationFP = eval.numFalsePositives(Settings.classificationChoice);
    Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice);
    Double kappa = eval.kappa();

    return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision,
            recall, fmeasure);
}

From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java

License:Open Source License

/**
 * @param folds/* w w  w.  ja v a  2 s . c om*/
 * @param minAgents
 * @param maxAgents
 * @param originalDatasetPath
 * @param outputDir
 * @param scenario
 * @param logger
 */
public void splitDataset(int folds, int minAgents, int maxAgents, String originalDatasetPath, String outputDir,
        String scenario, Logger logger) {

    int ratioint = (int) ((1 / (double) folds) * 100);
    double roundedratio = ((double) ratioint) / 100;

    // Look for essentials
    List<String[]> essentials = this.getEssentials(originalDatasetPath, logger);

    for (int fold = 0; fold < folds; fold++) {
        String outputDirWithRatio = outputDir + "/" + roundedratio + "testRatio/iteration-" + fold;
        File dir = new File(outputDirWithRatio);
        if (!dir.exists() || !dir.isDirectory()) {
            dir.mkdirs();
        }

        logger.finer("--> splitDataset()");
        logger.fine("Creating experiment.info...");

        try {

            Instances originalData = this.getDataFromCSV(originalDatasetPath);

            originalData.randomize(new Random());
            originalData.stratify(folds);

            // TestDataSet
            Instances testData = originalData.testCV(folds, fold);
            CSVSaver saver = new CSVSaver();
            ArffSaver arffsaver = new ArffSaver();
            File file = new File(outputDirWithRatio + File.separator + "test-dataset.csv");
            if (!file.exists()) {
                saver.resetOptions();
                saver.setInstances(testData);
                saver.setFile(file);
                saver.writeBatch();
            }

            file = new File(outputDirWithRatio + File.separator + "test-dataset.arff");
            if (!file.exists()) {
                arffsaver.resetOptions();
                arffsaver.setInstances(testData);
                arffsaver.setFile(file);
                arffsaver.writeBatch();
            }

            // BayesCentralDataset
            Instances trainData = originalData.trainCV(folds, fold);
            file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.csv");
            if (!file.exists()) {
                saver.resetOptions();
                saver.setInstances(trainData);
                saver.setFile(file);
                saver.writeBatch();
                this.copyFileUsingApacheCommonsIO(file,
                        new File(
                                outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.csv"),
                        logger);
                CsvWriter w = new CsvWriter(new FileWriter(file, true), ',');
                for (String[] essential : essentials) {
                    w.writeRecord(essential);
                }
                w.close();
            }
            file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.arff");
            if (!file.exists()) {
                arffsaver.resetOptions();
                arffsaver.setInstances(trainData);
                arffsaver.setFile(file);
                arffsaver.writeBatch();
                this.copyFileUsingApacheCommonsIO(file, new File(
                        outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.arff"),
                        logger);
                CsvWriter w = new CsvWriter(new FileWriter(file, true), ',');
                for (String[] essential : essentials) {
                    w.writeRecord(essential);
                }
                w.close();
            }

            // Agent datasets
            CsvReader csvreader = new CsvReader(new FileReader(new File(originalDatasetPath)));
            csvreader.readHeaders();
            String[] headers = csvreader.getHeaders();
            csvreader.close();

            for (int agents = minAgents; agents <= maxAgents; agents++) {
                this.createExperimentInfoFile(folds, agents, originalDatasetPath, outputDirWithRatio, scenario,
                        logger);
                HashMap<String, CsvWriter> writers = new HashMap<String, CsvWriter>();
                String agentsDatasetsDir = outputDirWithRatio + File.separator + agents + "agents";
                HashMap<String, CsvWriter> arffWriters = new HashMap<String, CsvWriter>();
                File f = new File(agentsDatasetsDir);
                if (!f.isDirectory()) {
                    f.mkdirs();
                }
                Instances copy = new Instances(trainData);
                copy.delete();
                for (int i = 0; i < agents; i++) {
                    String fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.csv";
                    file = new File(fileName);
                    if (!file.exists()) {
                        CsvWriter writer = new CsvWriter(new FileWriter(fileName), ',');
                        writer.writeRecord(headers);
                        writers.put("AGENT" + i, writer);
                    }
                    fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.arff";
                    file = new File(fileName);
                    if (!file.exists()) {
                        arffsaver.resetOptions();
                        arffsaver.setInstances(copy);
                        arffsaver.setFile(new File(fileName));
                        arffsaver.writeBatch();
                        CsvWriter arffwriter = new CsvWriter(new FileWriter(fileName, true), ',');
                        arffWriters.put("AGENT" + i, arffwriter);
                    }

                    logger.fine("AGENT" + i + " dataset created in csv and arff formats.");
                }
                // Append essentials to all
                for (String[] essential : essentials) {
                    for (CsvWriter wr : writers.values()) {
                        wr.writeRecord(essential);
                    }
                    for (CsvWriter arffwr : arffWriters.values()) {
                        arffwr.writeRecord(essential);
                    }
                }

                int agentCounter = 0;
                for (int j = 0; j < trainData.numInstances(); j++) {
                    Instance instance = trainData.instance(j);
                    CsvWriter writer = writers.get("AGENT" + agentCounter);
                    CsvWriter arffwriter = arffWriters.get("AGENT" + agentCounter);
                    String[] row = new String[instance.numAttributes()];
                    for (int a = 0; a < instance.numAttributes(); a++) {
                        row[a] = instance.stringValue(a);
                    }
                    if (writer != null) {
                        writer.writeRecord(row);
                    }
                    if (arffwriter != null) {
                        arffwriter.writeRecord(row);
                    }
                    agentCounter++;
                    if (agentCounter == agents) {
                        agentCounter = 0;
                    }
                }

                for (CsvWriter wr : writers.values()) {
                    wr.close();
                }
                for (CsvWriter arffwr : arffWriters.values()) {
                    arffwr.close();
                }
            }

        } catch (Exception e) {
            logger.severe("Exception while splitting dataset. ->");
            logger.severe(e.getMessage());
            System.exit(1);
        }

        logger.finest("Dataset for fold " + fold + " created.");
    }

    logger.finer("<-- splitDataset()");

}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Performs a (stratified if class is nominal) cross-validation
 * for a classifier on a set of instances.
 *
 * @param classifier the classifier with any options set.
 * @param data the data on which the cross-validation is to be
 * performed//from w w  w  . j a v a 2  s. c  om
 * @param numFolds the number of folds for the cross-validation
 * @param random random number generator for randomization
 * @exception Exception if a classifier could not be generated
 * successfully or the class is not defined
 */
public void crossValidateModel(Classifier classifier, Instances data, int numFolds, Random random)
        throws Exception {

    // Make a copy of the data we can reorder
    data = new Instances(data);
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(numFolds);
    }
    // Do the folds
    for (int i = 0; i < numFolds; i++) {
        Instances train = data.trainCV(numFolds, i, random);
        setPriors(train);
        Classifier copiedClassifier = Classifier.makeCopy(classifier);
        copiedClassifier.buildClassifier(train);
        Instances test = data.testCV(numFolds, i);
        evaluateModel(copiedClassifier, test);
    }
    m_NumFolds = numFolds;
}

From source file:gr.auth.ee.lcs.ArffTrainTestLoader.java

License:Open Source License

/**
 * Load instances into the global train store and create test set.
 * /*w ww .  j  a  v a  2 s.  co m*/
 * @param filename
 *            the .arff filename to be used
 * @param generateTestSet
 *            true if a test set is going to be generated
 * @throws IOException
 *             if the input file is not found
 */
public final void loadInstances(final String filename, final boolean generateTestSet) throws IOException {
    // Open .arff
    final Instances set = InstancesUtility.openInstance(filename);
    if (set.classIndex() < 0) {
        set.setClassIndex(set.numAttributes() - 1);
    }
    set.randomize(new Random());

    if (generateTestSet) {
        final int numOfFolds = (int) SettingsLoader.getNumericSetting("NumberOfFolds", 10);
        final int fold = (int) Math.floor(Math.random() * numOfFolds);
        trainSet = set.trainCV(numOfFolds, fold);
        testSet = set.testCV(numOfFolds, fold);
    } else {
        trainSet = set;
    }

    myLcs.instances = InstancesUtility.convertIntancesToDouble(trainSet);
    myLcs.labelCardinality = InstancesUtility.getLabelCardinality(trainSet);

}

From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java

private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier,
        Instances pInstances, String pModelName, String pClassifierName) throws Exception {

    // other options
    int folds = 10;

    // randomize data
    Random rand = new Random(42);
    Instances randData = new Instances(pInstances);
    randData.randomize(rand);/*from  w  w w  . j a va  2  s  .co  m*/
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Instances predictedData = null;
    Evaluation eval = new Evaluation(randData);

    int positiveValueIndexOfClassFeature = 0;
    for (int n = 0; n < folds; n++) {
        Instances train = randData.trainCV(folds, n);
        Instances test = randData.testCV(folds, n);
        // the above code is used by the StratifiedRemoveFolds filter, the
        // code below by the Explorer/Experimenter:
        // Instances train = randData.trainCV(folds, n, rand);

        int classFeatureIndex = 0;
        for (int i = 0; i < train.numAttributes(); i++) {
            if (train.attribute(i).name().equals("isBuggy")) {
                classFeatureIndex = i;
                break;
            }
        }

        Attribute classFeature = train.attribute(classFeatureIndex);
        for (int i = 0; i < classFeature.numValues(); i++) {
            if (classFeature.value(i).equals("TRUE")) {
                positiveValueIndexOfClassFeature = i;
            }
        }

        train.setClassIndex(classFeatureIndex);
        test.setClassIndex(classFeatureIndex);

        // build and evaluate classifier
        pClassifier.buildClassifier(train);
        eval.evaluateModel(pClassifier, test);

        // add predictions
        //           AddClassification filter = new AddClassification();
        //           filter.setClassifier(pClassifier);
        //           filter.setOutputClassification(true);
        //           filter.setOutputDistribution(true);
        //           filter.setOutputErrorFlag(true);
        //           filter.setInputFormat(train);
        //           Filter.useFilter(train, filter); 
        //           Instances pred = Filter.useFilter(test, filter); 
        //           if (predictedData == null)
        //             predictedData = new Instances(pred, 0);
        //           
        //           for (int j = 0; j < pred.numInstances(); j++)
        //             predictedData.add(pred.instance(j));
    }
    double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature)
            + eval.numTrueNegatives(positiveValueIndexOfClassFeature))
            / (eval.numTruePositives(positiveValueIndexOfClassFeature)
                    + eval.numFalsePositives(positiveValueIndexOfClassFeature)
                    + eval.numFalseNegatives(positiveValueIndexOfClassFeature)
                    + eval.numTrueNegatives(positiveValueIndexOfClassFeature));

    double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature)
            * eval.recall(positiveValueIndexOfClassFeature))
            / (eval.precision(positiveValueIndexOfClassFeature)
                    + eval.recall(positiveValueIndexOfClassFeature)));
    File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv");
    PrintWriter pw1 = new PrintWriter(wekaOutput);

    pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";"
            + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";"
            + eval.areaUnderROC(positiveValueIndexOfClassFeature));

    System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";"
            + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";"
            + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";"
            + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";"
            + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";"
            + eval.precision(positiveValueIndexOfClassFeature) + ";"
            + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";"
            + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n");
}

From source file:j48.NBTreeNoSplit.java

License:Open Source License

/**
 * Utility method for fast 5-fold cross validation of a naive bayes
 * model/*from   w w w .j  a v  a  2 s.  c om*/
 *
 * @param fullModel a <code>NaiveBayesUpdateable</code> value
 * @param trainingSet an <code>Instances</code> value
 * @param r a <code>Random</code> value
 * @return a <code>double</code> value
 * @exception Exception if an error occurs
 */
public static double crossValidate(NaiveBayesUpdateable fullModel, Instances trainingSet, Random r)
        throws Exception {
    // make some copies for fast evaluation of 5-fold xval
    Classifier[] copies = Classifier.makeCopies(fullModel, 5);
    Evaluation eval = new Evaluation(trainingSet);
    // make some splits
    for (int j = 0; j < 5; j++) {
        Instances test = trainingSet.testCV(5, j);
        // unlearn these test instances
        for (int k = 0; k < test.numInstances(); k++) {
            test.instance(k).setWeight(-test.instance(k).weight());
            ((NaiveBayesUpdateable) copies[j]).updateClassifier(test.instance(k));
            // reset the weight back to its original value
            test.instance(k).setWeight(-test.instance(k).weight());
        }
        eval.evaluateModel(copies[j], test);
    }
    return eval.incorrect();
}

From source file:j48.PruneableClassifierTree.java

License:Open Source License

/**
 * Method for building a pruneable classifier tree.
 *
 * @param data the data to build the tree from 
 * @throws Exception if tree can't be built successfully
 *///from  w  w  w  .  ja va2s  . c  o  m
public void buildClassifier(Instances data) throws Exception {

    // can classifier tree handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_seed);
    data.stratify(numSets);
    buildTree(data.trainCV(numSets, numSets - 1, random), data.testCV(numSets, numSets - 1), !m_cleanup);
    if (pruneTheTree) {
        prune();
    }
    if (m_cleanup) {
        cleanup(new Instances(data, 0));
    }
}

From source file:jjj.asap.sas.ensemble.impl.CrossValidatedEnsemble.java

License:Open Source License

@Override
public StrongLearner build(int essaySet, String ensembleName, List<WeakLearner> learners) {

    // can't handle empty case
    if (learners.isEmpty()) {
        return this.ensemble.build(essaySet, ensembleName, learners);
    }/*from  w ww  . ja v a 2  s.  c om*/

    // create a dummy dataset.
    DatasetBuilder builder = new DatasetBuilder();
    builder.addVariable("id");
    builder.addNominalVariable("class", Contest.getRubrics(essaySet));
    Instances dummy = builder.getDataset("dummy");

    // add data
    Map<Double, Double> groundTruth = Contest.getGoldStandard(essaySet);
    for (double id : learners.get(0).getPreds().keySet()) {
        dummy.add(new DenseInstance(1.0, new double[] { id, groundTruth.get(id) }));
    }

    // stratify
    dummy.sort(0);
    dummy.randomize(new Random(1));
    dummy.setClassIndex(1);
    dummy.stratify(nFolds);

    // now evaluate each fold
    Map<Double, Double> preds = new HashMap<Double, Double>();
    for (int k = 0; k < nFolds; k++) {
        Instances train = dummy.trainCV(nFolds, k);
        Instances test = dummy.testCV(nFolds, k);

        List<WeakLearner> cvLeaners = new ArrayList<WeakLearner>();
        for (WeakLearner learner : learners) {
            WeakLearner copy = learner.copyOf();
            for (int i = 0; i < test.numInstances(); i++) {
                copy.getPreds().remove(test.instance(i).value(0));
                copy.getProbs().remove(test.instance(i).value(0));
            }
            cvLeaners.add(copy);
        }

        // train on fold
        StrongLearner cv = this.ensemble.build(essaySet, ensembleName, cvLeaners);

        List<WeakLearner> testLeaners = new ArrayList<WeakLearner>();
        for (WeakLearner learner : cv.getLearners()) {
            WeakLearner copy = learner.copyOf();
            copy.getPreds().clear();
            copy.getProbs().clear();
            WeakLearner source = find(copy.getName(), learners);
            for (int i = 0; i < test.numInstances(); i++) {
                double id = test.instance(i).value(0);
                copy.getPreds().put(id, source.getPreds().get(id));
                copy.getProbs().put(id, source.getProbs().get(id));
            }
            testLeaners.add(copy);
        }

        preds.putAll(this.ensemble.classify(essaySet, ensembleName, testLeaners, cv.getContext()));
    }

    // now prepare final result

    StrongLearner strong = this.ensemble.build(essaySet, ensembleName, learners);

    double trainingError = strong.getKappa();
    double cvError = Calc.kappa(essaySet, preds, groundTruth);
    //   Job.log(essaySet+"-"+ensembleName, "XVAL: training error = " + trainingError + " cv error = " + cvError);      

    strong.setKappa(cvError);
    return strong;
}

From source file:liac.igmn.evaluation.Evaluator.java

License:Open Source License

public void crossValidation(IGMN model, Dataset dataset, int numFolds, int runs, boolean randomize) {
    confusionMatrix = new ConfusionMatrix(dataset.getClassesNames());

    Instances instances = dataset.getWekaDataset();
    int seed = 1;
    for (int run = 0; run < runs; run++) {
        if (randomize) {
            instances.randomize(new Random(seed));
            seed += 1;/*from   w ww  . j a v  a 2s .  c o m*/
        }

        if (verbose)
            System.out.println("RUN: " + (run + 1));

        for (int n = 0; n < numFolds; n++) {
            Instances train = instances.trainCV(numFolds, n);
            Instances test = instances.testCV(numFolds, n);

            SimpleMatrix trainData = MatrixUtil.instancesToMatrix(train);
            SimpleMatrix testData = MatrixUtil.instancesToMatrix(test);

            model.reset();

            if (verbose)
                System.out.println("TRAINING FOLD: " + (n + 1));

            model.train(trainData);

            if (verbose)
                System.out.println("TESTING...");

            SimpleMatrix testInputs = testData.extractMatrix(0, dataset.getInputSize(), 0, SimpleMatrix.END);
            SimpleMatrix testTargets = testData.extractMatrix(dataset.getInputSize(),
                    dataset.getNumAttributes(), 0, SimpleMatrix.END);
            for (int i = 0; i < testInputs.numCols(); i++) {
                SimpleMatrix y = model.classify(testInputs.extractVector(false, i));
                SimpleMatrix target = testTargets.extractVector(false, i);

                int tInd = MatrixUtil.maxElementIndex(target);
                int yInd = MatrixUtil.maxElementIndex(y);

                confusionMatrix.addPrediction(tInd, yInd);
            }
        }
    }
    confusionMatrix.set(confusionMatrix.divide(runs));
}