Example usage for weka.core Instances trainCV

List of usage examples for weka.core Instances trainCV

Introduction

In this page you can find the example usage for weka.core Instances trainCV.

Prototype



public Instances trainCV(int numFolds, int numFold) 

Source Link

Document

Creates the training set for one fold of a cross-validation on the dataset.

Usage

From source file:mulan.evaluation.Evaluator.java

License:Open Source License

private MultipleEvaluation innerCrossValidate(MultiLabelLearner learner, MultiLabelInstances data,
        boolean hasMeasures, List<Measure> measures, int someFolds) {
    Evaluation[] evaluation = new Evaluation[someFolds];

    Instances workingSet = new Instances(data.getDataSet());
    workingSet.randomize(new Random(seed));
    for (int i = 0; i < someFolds; i++) {
        System.out.println("Fold " + (i + 1) + "/" + someFolds);
        try {/* ww  w.j av  a 2s  . c om*/
            Instances train = workingSet.trainCV(someFolds, i);
            Instances test = workingSet.testCV(someFolds, i);
            MultiLabelInstances mlTrain = new MultiLabelInstances(train, data.getLabelsMetaData());
            MultiLabelInstances mlTest = new MultiLabelInstances(test, data.getLabelsMetaData());
            MultiLabelLearner clone = learner.makeCopy();
            clone.build(mlTrain);
            if (hasMeasures)
                evaluation[i] = evaluate(clone, mlTest, measures);
            else
                evaluation[i] = evaluate(clone, mlTest);
        } catch (Exception ex) {
            Logger.getLogger(Evaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    return new MultipleEvaluation(evaluation);
}

From source file:net.sf.bddbddb.order.WekaInterface.java

License:LGPL

public static double cvError(int numFolds, Instances data0, String cClassName) {
    if (data0.numInstances() < numFolds)
        return Double.NaN; //more folds than elements
    if (numFolds == 0)
        return Double.NaN; // no folds
    if (data0.numInstances() == 0)
        return 0; //no instances

    Instances data = new Instances(data0);
    //data.randomize(new Random(System.currentTimeMillis()));
    data.stratify(numFolds);/*from   w  w w . jav  a2 s.  co  m*/
    Assert._assert(data.classAttribute() != null);
    double[] estimates = new double[numFolds];
    for (int i = 0; i < numFolds; ++i) {
        Instances trainData = data.trainCV(numFolds, i);
        Assert._assert(trainData.classAttribute() != null);
        Assert._assert(trainData.numInstances() != 0, "Cannot train classifier on 0 instances.");

        Instances testData = data.testCV(numFolds, i);
        Assert._assert(testData.classAttribute() != null);
        Assert._assert(testData.numInstances() != 0, "Cannot test classifier on 0 instances.");

        int temp = FindBestDomainOrder.TRACE;
        FindBestDomainOrder.TRACE = 0;
        Classifier classifier = buildClassifier(cClassName, trainData);
        FindBestDomainOrder.TRACE = temp;
        int count = testData.numInstances();
        double loss = 0;
        double sum = 0;
        for (Enumeration e = testData.enumerateInstances(); e.hasMoreElements();) {
            Instance instance = (Instance) e.nextElement();
            Assert._assert(instance != null);
            Assert._assert(instance.classAttribute() != null
                    && instance.classAttribute() == trainData.classAttribute());
            try {
                double testClass = classifier.classifyInstance(instance);
                double weight = instance.weight();
                if (testClass != instance.classValue())
                    loss += weight;
                sum += weight;
            } catch (Exception ex) {
                FindBestDomainOrder.out.println("Exception while classifying: " + instance + "\n" + ex);
            }
        }
        estimates[i] = 1 - loss / sum;
    }
    double average = 0;
    for (int i = 0; i < numFolds; ++i)
        average += estimates[i];

    return average / numFolds;
}

From source file:semana07.IrisKnn.java

public static void main(String[] args) throws FileNotFoundException, IOException, Exception {

    // DEFININDO CONJUNTO DE TREINAMENTO

    // - Definindo o leitor do arquivo arff

    FileReader baseIris = new FileReader("iris.arff");
    // - Definindo o grupo de instancias a partir do arquivo "simpsons.arff"

    Instances iris = new Instances(baseIris);

    // - Definindo o indice do atributo classe

    iris.setClassIndex(4);/* www  . jav  a  2  s .c om*/

    iris = iris.resample(new Debug.Random());

    Instances irisTreino = iris.trainCV(3, 0);
    Instances irisTeste = iris.testCV(3, 0);

    // DEFININDO EXEMPLO DESCONHECIDO

    //5.9,3.0,5.1,1.8,Iris-virginica
    Instance irisInst = new DenseInstance(iris.numAttributes());
    irisInst.setDataset(iris);
    irisInst.setValue(0, 5.9);
    irisInst.setValue(1, 3.0);
    irisInst.setValue(2, 5.1);
    irisInst.setValue(3, 1.8);

    // DEFININDO ALGORITMO DE CLASSIFICAO

    //NN

    IBk vizinhoIris = new IBk();

    //kNN

    IBk knnIris = new IBk(3);

    // MONTANDO CLASSIFICADOR
    //NN

    vizinhoIris.buildClassifier(irisTreino);

    //kNN

    knnIris.buildClassifier(irisTreino);

    // Definindo arquivo a ser escrito
    FileWriter writer = new FileWriter("iris.csv");

    // Escrevendo o cabealho do arquivo
    writer.append("Classe Real;Resultado NN;Resultado kNN");
    writer.append(System.lineSeparator());

    // Sada CLI / Console
    System.out.println("Classe Real;Resultado NN;Resultado kNN"); //Cabealho
    for (int i = 0; i <= irisTeste.numInstances() - 1; i++) {

        Instance testeIris = irisTeste.instance(i);

        // Sada CLI / Console do valor original
        System.out.print(testeIris.stringValue(4) + ";");

        // Escrevendo o valor original no arquivo
        writer.append(testeIris.stringValue(4) + ";");

        // Definindo o atributo classe como indefinido
        testeIris.setClassMissing();

        // CLASSIFICANDO A INSTANCIA
        // NN

        double respostaVizinho = vizinhoIris.classifyInstance(testeIris);
        testeIris.setValue(4, respostaVizinho);
        String stringVizinho = testeIris.stringValue(4);

        //kNN

        double respostaKnn = knnIris.classifyInstance(testeIris);

        // Atribuindo respota ao valor do atributo do index 4(classe)

        testeIris.setValue(4, respostaKnn);

        String stringKnn = testeIris.stringValue(4);
        // Adicionando resultado ao grupo de instancia iris

        iris.add(irisInst);

        //Escrevendo os resultados no arquivo iris.csv

        writer.append(stringVizinho + ";");
        writer.append(stringKnn + ";");
        writer.append(System.lineSeparator());

        // Exibindo via CLI / Console o resultado

        System.out.print(respostaVizinho + ";");
        System.out.print(respostaKnn + ";");
        System.out.println(testeIris.stringValue(4));
    }

    writer.flush();
    writer.close();

}

From source file:sentinets.Prediction.java

License:Open Source License

public String updateModel(String inputFile, ArrayList<Double[]> metrics) {
    String output = "";
    this.setInstances(inputFile);
    FilteredClassifier fcls = (FilteredClassifier) this.cls;
    SGD cls = (SGD) fcls.getClassifier();
    Filter filter = fcls.getFilter();
    Instances insAll;/*from ww w .  jav  a  2  s. co  m*/
    try {
        insAll = Filter.useFilter(this.unlabled, filter);
        if (insAll.size() > 0) {
            Random rand = new Random(10);
            int folds = 10 > insAll.size() ? 2 : 10;
            Instances randData = new Instances(insAll);
            randData.randomize(rand);
            if (randData.classAttribute().isNominal()) {
                randData.stratify(folds);
            }
            Evaluation eval = new Evaluation(randData);
            eval.evaluateModel(cls, insAll);
            System.out.println("Initial Evaluation");
            System.out.println(eval.toSummaryString());
            System.out.println(eval.toClassDetailsString());
            metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() });
            output += "\n====" + "Initial Evaluation" + "====\n";
            output += "\n" + eval.toSummaryString();
            output += "\n" + eval.toClassDetailsString();
            System.out.println("Cross Validated Evaluation");
            output += "\n====" + "Cross Validated Evaluation" + "====\n";
            for (int n = 0; n < folds; n++) {
                Instances train = randData.trainCV(folds, n);
                Instances test = randData.testCV(folds, n);

                for (int i = 0; i < train.numInstances(); i++) {
                    cls.updateClassifier(train.instance(i));
                }

                eval.evaluateModel(cls, test);
                System.out.println("Cross Validated Evaluation fold: " + n);
                output += "\n====" + "Cross Validated Evaluation fold (" + n + ")====\n";
                System.out.println(eval.toSummaryString());
                System.out.println(eval.toClassDetailsString());
                output += "\n" + eval.toSummaryString();
                output += "\n" + eval.toClassDetailsString();
                metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() });
            }
            for (int i = 0; i < insAll.numInstances(); i++) {
                cls.updateClassifier(insAll.instance(i));
            }
            eval.evaluateModel(cls, insAll);
            System.out.println("Final Evaluation");
            System.out.println(eval.toSummaryString());
            System.out.println(eval.toClassDetailsString());
            output += "\n====" + "Final Evaluation" + "====\n";
            output += "\n" + eval.toSummaryString();
            output += "\n" + eval.toClassDetailsString();
            metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() });
            fcls.setClassifier(cls);
            String modelFilePath = outputDir + "/" + Utils.getOutDir(Utils.OutDirIndex.MODELS)
                    + "/updatedClassifier.model";
            weka.core.SerializationHelper.write(modelFilePath, fcls);
            output += "\n" + "Updated Model saved at: " + modelFilePath;
        } else {
            output += "No new instances for training the model.";
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return output;
}

From source file:src.BigDataClassifier.GenerateFolds.java

public void generateFolds(Instances trainDataset) throws Exception {

    //randomize data
    Random rand = new Random(1);
    //set folds/*from  w ww. j ava  2  s .  co  m*/
    int folds = 3;
    //create random dataset
    Instances randData = new Instances(trainDataset);
    randData.randomize(rand);

    Instances[] result = new Instances[folds * 2];
    //cross-validate
    for (int n = 0; n < folds; n++) {
        trainDataset = randData.trainCV(folds, n);
        System.out.println("Train dataset size is = " + trainDataset.size());
        Instances testDataset = randData.testCV(folds, n);
        System.out.println("Test dataset size is = " + testDataset.size());
        result[n] = trainDataset;
        result[n + 1] = testDataset;
        trainDataset2 = trainDataset;
        testDataset2 = testDataset;
    }
    trainDatasetSize = trainDataset2.size();
    testDatasetSize = testDataset2.size();
}

From source file:tubes.ml.pkg1.TubesML1.java

public void akses() throws Exception {
    Discretize filter;//  w  ww .  ja va2 s.  c o m
    int fold = 10;
    int fold3 = 3;
    int trainNum, testNum;
    PrintWriter file = new PrintWriter("model.txt");

    /***dataset 1***/
    file.println("***DATASET 1***");
    fileReader tets = new fileReader("./src/data/iris.arff");
    try {
        tets.read();
    } catch (IOException ex) {
        Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
    }
    Instances data = tets.getData();
    filter = new Discretize();
    try {
        filter.setInputFormat(data);
    } catch (Exception ex) {
        Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
    }

    /*ID3*/
    Instances discreteData;
    discreteData = Filter.useFilter(data, filter);
    trainNum = discreteData.numInstances() * 3 / 4;
    testNum = discreteData.numInstances() / 4;

    for (int i = 0; i < fold; i++) {
        try {

            Instances train = discreteData.trainCV(fold, i);
            Instances test = discreteData.testCV(fold, i);

            Id3 iTiga = new Id3();
            Evaluation validation = new Evaluation(train);
            try {
                iTiga.buildClassifier(train);
                System.out.println(iTiga.toString());
                file.println(iTiga.toString());
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(iTiga, test);
            System.out.println(validation.toSummaryString());
            file.println("Validation " + (i + 1));
            file.println(validation.toSummaryString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /*J48*/
    trainNum = data.numInstances() * 3 / 4;
    testNum = data.numInstances() / 4;
    J48 jKT = new J48();
    for (int i = 0; i < fold; i++) {
        Instances train = data.trainCV(fold, i);
        Instances test = data.testCV(fold, i);
        try {
            Evaluation validation = new Evaluation(train);
            try {
                jKT.buildClassifier(data);
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(jKT, test);
            System.out.println(validation.toSummaryString());
            file.println("Validation " + (i + 1));
            file.println(validation.toSummaryString());
            // System.out.println(jKT.toString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /*dataset 2*/
    file.println("***DATASET 2***");
    tets.setFilepath("./src/data/weather.arff");
    try {
        tets.read();
    } catch (IOException ex) {
        Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
    }
    data = new Instances(tets.getData());

    /*ID3*/
    discreteData = Filter.useFilter(data, filter);
    trainNum = discreteData.numInstances() * 3 / 4;
    testNum = discreteData.numInstances() / 4;

    for (int i = 0; i < fold3; i++) {
        try {
            Instances train = discreteData.trainCV(trainNum, i);
            Instances test = discreteData.testCV(testNum, i);

            Id3 iTiga = new Id3();
            Evaluation validation = new Evaluation(train);
            try {
                iTiga.buildClassifier(train);
                System.out.println(iTiga.toString());
                //file.println(iTiga.toString());
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(iTiga, test);
            System.out.println(validation.toSummaryString());
            file.println("Validation " + (i + 1));
            file.println(validation.toSummaryString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    System.out.println(testNum);
    file.println("Test Number");
    file.println(testNum);

    /*J48*/
    trainNum = data.numInstances() * 3 / 4;
    testNum = data.numInstances() / 4;

    for (int i = 0; i < fold; i++) {
        Instances train = data.trainCV(fold, i);
        Instances test = data.testCV(fold, i);
        try {
            Evaluation validation = new Evaluation(train);
            try {
                jKT.buildClassifier(data);
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(jKT, test);
            System.out.println(validation.toSummaryString());
            file.println(validation.toSummaryString());
            System.out.println(jKT.toString());
            file.println(jKT.toString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /*dataset 3*/
    file.println("***DATASET 3***");
    tets.setFilepath("./src/data/weather.nominal.arff");
    try {
        tets.read();
    } catch (IOException ex) {
        Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
    }
    data = new Instances(tets.getData());

    /*ID3*/
    discreteData = Filter.useFilter(data, filter);
    trainNum = discreteData.numInstances() * 3 / 4;
    testNum = discreteData.numInstances() / 4;

    for (int i = 0; i < fold3; i++) {
        try {
            Instances train = discreteData.trainCV(fold, i);
            Instances test = discreteData.testCV(fold, i);

            Id3 iTiga = new Id3();
            Evaluation validation = new Evaluation(train);
            try {
                iTiga.buildClassifier(train);
                System.out.println(iTiga.toString());
                file.println(iTiga.toString());
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(iTiga, test);
            System.out.println(validation.toSummaryString());
            file.println(validation.toSummaryString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    System.out.println(testNum);
    file.println("Test Number");
    file.println(testNum);

    /*J48*/
    trainNum = data.numInstances() * 3 / 4;
    testNum = data.numInstances() / 4;

    for (int i = 0; i < fold; i++) {
        Instances train = data.trainCV(fold, i);
        Instances test = data.testCV(fold, i);
        try {
            Evaluation validation = new Evaluation(train);
            try {
                jKT.buildClassifier(data);
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(jKT, test);
            System.out.println(validation.toSummaryString());
            file.println(validation.toSummaryString());
            System.out.println(jKT.toString());
            file.println(jKT.toString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /*RESULTT*/
    System.out.println(jKT.toString());
    file.println("RESULT");
    file.println(jKT.toString());
    file.close();
}

From source file:tubes1.Main.java

public static Instances[][] crossValidationSplit(Instances data, int numberOfFolds) {
    Instances[][] split = new Instances[2][numberOfFolds];
    for (int i = 0; i < numberOfFolds; i++) {
        split[0][i] = data.trainCV(numberOfFolds, i);
        split[1][i] = data.testCV(numberOfFolds, i);
    }//from ww w  . j  a  v  a2 s  .  c om
    return split;
}