List of usage examples for weka.core Instances trainCV
public Instances trainCV(int numFolds, int numFold)
From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java
private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier, Instances pInstances, String pModelName, String pClassifierName) throws Exception { // other options int folds = 10; // randomize data Random rand = new Random(42); Instances randData = new Instances(pInstances); randData.randomize(rand);/* w w w . j a v a2s . c o m*/ if (randData.classAttribute().isNominal()) { randData.stratify(folds); } // perform cross-validation and add predictions Instances predictedData = null; Evaluation eval = new Evaluation(randData); int positiveValueIndexOfClassFeature = 0; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); int classFeatureIndex = 0; for (int i = 0; i < train.numAttributes(); i++) { if (train.attribute(i).name().equals("isBuggy")) { classFeatureIndex = i; break; } } Attribute classFeature = train.attribute(classFeatureIndex); for (int i = 0; i < classFeature.numValues(); i++) { if (classFeature.value(i).equals("TRUE")) { positiveValueIndexOfClassFeature = i; } } train.setClassIndex(classFeatureIndex); test.setClassIndex(classFeatureIndex); // build and evaluate classifier pClassifier.buildClassifier(train); eval.evaluateModel(pClassifier, test); // add predictions // AddClassification filter = new AddClassification(); // filter.setClassifier(pClassifier); // filter.setOutputClassification(true); // filter.setOutputDistribution(true); // filter.setOutputErrorFlag(true); // filter.setInputFormat(train); // Filter.useFilter(train, filter); // Instances pred = Filter.useFilter(test, filter); // if (predictedData == null) // predictedData = new Instances(pred, 0); // // for (int j = 0; j < pred.numInstances(); j++) // predictedData.add(pred.instance(j)); } double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)) / (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numFalsePositives(positiveValueIndexOfClassFeature) + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)); double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature) * eval.recall(positiveValueIndexOfClassFeature)) / (eval.precision(positiveValueIndexOfClassFeature) + eval.recall(positiveValueIndexOfClassFeature))); File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv"); PrintWriter pw1 = new PrintWriter(wekaOutput); pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature)); System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";" + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";" + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n"); }
From source file:jjj.asap.sas.ensemble.impl.CrossValidatedEnsemble.java
License:Open Source License
@Override public StrongLearner build(int essaySet, String ensembleName, List<WeakLearner> learners) { // can't handle empty case if (learners.isEmpty()) { return this.ensemble.build(essaySet, ensembleName, learners); }//from ww w . j a v a 2s .c o m // create a dummy dataset. DatasetBuilder builder = new DatasetBuilder(); builder.addVariable("id"); builder.addNominalVariable("class", Contest.getRubrics(essaySet)); Instances dummy = builder.getDataset("dummy"); // add data Map<Double, Double> groundTruth = Contest.getGoldStandard(essaySet); for (double id : learners.get(0).getPreds().keySet()) { dummy.add(new DenseInstance(1.0, new double[] { id, groundTruth.get(id) })); } // stratify dummy.sort(0); dummy.randomize(new Random(1)); dummy.setClassIndex(1); dummy.stratify(nFolds); // now evaluate each fold Map<Double, Double> preds = new HashMap<Double, Double>(); for (int k = 0; k < nFolds; k++) { Instances train = dummy.trainCV(nFolds, k); Instances test = dummy.testCV(nFolds, k); List<WeakLearner> cvLeaners = new ArrayList<WeakLearner>(); for (WeakLearner learner : learners) { WeakLearner copy = learner.copyOf(); for (int i = 0; i < test.numInstances(); i++) { copy.getPreds().remove(test.instance(i).value(0)); copy.getProbs().remove(test.instance(i).value(0)); } cvLeaners.add(copy); } // train on fold StrongLearner cv = this.ensemble.build(essaySet, ensembleName, cvLeaners); List<WeakLearner> testLeaners = new ArrayList<WeakLearner>(); for (WeakLearner learner : cv.getLearners()) { WeakLearner copy = learner.copyOf(); copy.getPreds().clear(); copy.getProbs().clear(); WeakLearner source = find(copy.getName(), learners); for (int i = 0; i < test.numInstances(); i++) { double id = test.instance(i).value(0); copy.getPreds().put(id, source.getPreds().get(id)); copy.getProbs().put(id, source.getProbs().get(id)); } testLeaners.add(copy); } preds.putAll(this.ensemble.classify(essaySet, ensembleName, testLeaners, cv.getContext())); } // now prepare final result StrongLearner strong = this.ensemble.build(essaySet, ensembleName, learners); double trainingError = strong.getKappa(); double cvError = Calc.kappa(essaySet, preds, groundTruth); // Job.log(essaySet+"-"+ensembleName, "XVAL: training error = " + trainingError + " cv error = " + cvError); strong.setKappa(cvError); return strong; }
From source file:liac.igmn.evaluation.Evaluator.java
License:Open Source License
public void crossValidation(IGMN model, Dataset dataset, int numFolds, int runs, boolean randomize) { confusionMatrix = new ConfusionMatrix(dataset.getClassesNames()); Instances instances = dataset.getWekaDataset(); int seed = 1; for (int run = 0; run < runs; run++) { if (randomize) { instances.randomize(new Random(seed)); seed += 1;/* w w w .j a v a 2 s . c o m*/ } if (verbose) System.out.println("RUN: " + (run + 1)); for (int n = 0; n < numFolds; n++) { Instances train = instances.trainCV(numFolds, n); Instances test = instances.testCV(numFolds, n); SimpleMatrix trainData = MatrixUtil.instancesToMatrix(train); SimpleMatrix testData = MatrixUtil.instancesToMatrix(test); model.reset(); if (verbose) System.out.println("TRAINING FOLD: " + (n + 1)); model.train(trainData); if (verbose) System.out.println("TESTING..."); SimpleMatrix testInputs = testData.extractMatrix(0, dataset.getInputSize(), 0, SimpleMatrix.END); SimpleMatrix testTargets = testData.extractMatrix(dataset.getInputSize(), dataset.getNumAttributes(), 0, SimpleMatrix.END); for (int i = 0; i < testInputs.numCols(); i++) { SimpleMatrix y = model.classify(testInputs.extractVector(false, i)); SimpleMatrix target = testTargets.extractVector(false, i); int tInd = MatrixUtil.maxElementIndex(target); int yInd = MatrixUtil.maxElementIndex(y); confusionMatrix.addPrediction(tInd, yInd); } } } confusionMatrix.set(confusionMatrix.divide(runs)); }
From source file:machinelearningcw.EnhancedLinearPerceptron.java
public boolean crossValidation(Instances ins) throws Exception { //get the data Instances data = new Instances(ins); Instances train;// the new training data Instances test; // the new testing data int seed = 0; Random rand = new Random(seed); //randomize the data data.randomize(rand);// w w w . j a v a 2 s. c o m //number of folds int folds = 10; int offlineErrors = 0; int onlineErrors = 0; for (int i = 0; i < folds; i++) { train = data.trainCV(folds, i); test = data.testCV(folds, i); //add the the total errors for each //offlineErrors += offlinePerceptron(train); for (Instance inst : test) { if (classifyInstance(inst) != inst.classValue()) { offlineErrors += 1; } } //reset w Arrays.fill(w, 1); perceptron(train); for (Instance inst : test) { if (classifyInstance(inst) != inst.classValue()) { onlineErrors += 1; } } } // System.out.println(" off: " + offlineErrors); // System.out.println(" on: " + onlineErrors); //calculate the mean of the total errors offlineErrors = offlineErrors / folds; onlineErrors = onlineErrors / folds; // System.out.println(flag); return offlineErrors > onlineErrors; }
From source file:mao.datamining.ModelProcess.java
private void testCV(Classifier classifier, Instances finalTrainDataSet, FileOutputStream testCaseSummaryOut, TestResult result) {//from w w w .j a v a2s . com long start, end, trainTime = 0, testTime = 0; Evaluation evalAll = null; double confusionMatrix[][] = null; // randomize data, and then stratify it into 10 groups Random rand = new Random(1); Instances randData = new Instances(finalTrainDataSet); randData.randomize(rand); if (randData.classAttribute().isNominal()) { //always run with 10 cross validation randData.stratify(folds); } try { evalAll = new Evaluation(randData); for (int i = 0; i < folds; i++) { Evaluation eval = new Evaluation(randData); Instances train = randData.trainCV(folds, i); Instances test = randData.testCV(folds, i); //counting traininig time start = System.currentTimeMillis(); Classifier j48ClassifierCopy = Classifier.makeCopy(classifier); j48ClassifierCopy.buildClassifier(train); end = System.currentTimeMillis(); trainTime += end - start; //counting test time start = System.currentTimeMillis(); eval.evaluateModel(j48ClassifierCopy, test); evalAll.evaluateModel(j48ClassifierCopy, test); end = System.currentTimeMillis(); testTime += end - start; } } catch (Exception e) { ModelProcess.logging(null, e); } //end test by cross validation // output evaluation try { ModelProcess.logging(""); //write into summary file testCaseSummaryOut .write((evalAll.toSummaryString("=== Cross Validation Summary ===", true)).getBytes()); testCaseSummaryOut.write("\n".getBytes()); testCaseSummaryOut.write( (evalAll.toClassDetailsString("=== " + folds + "-fold Cross-validation Class Detail ===\n")) .getBytes()); testCaseSummaryOut.write("\n".getBytes()); testCaseSummaryOut .write((evalAll.toMatrixString("=== Confusion matrix for all folds ===\n")).getBytes()); testCaseSummaryOut.flush(); confusionMatrix = evalAll.confusionMatrix(); result.setConfusionMatrix10Folds(confusionMatrix); } catch (Exception e) { ModelProcess.logging(null, e); } }
From source file:meka.classifiers.multilabel.Evaluation.java
License:Open Source License
/** * CVModel - Split D into train/test folds, and then train and evaluate on each one. * @param h a multi-output classifier * @param D test data Instances * @param numFolds number of folds of CV * @param top Threshold OPtion (pertains to multi-label data only) * @param vop Verbosity OPtion (which measures do we want to calculate/output) * @return Result raw prediction data with evaluation statistics included. *//*from ww w.ja v a 2 s. c o m*/ public static Result cvModel(MultiLabelClassifier h, Instances D, int numFolds, String top, String vop) throws Exception { Result r_[] = new Result[numFolds]; for (int i = 0; i < numFolds; i++) { Instances D_train = D.trainCV(numFolds, i); Instances D_test = D.testCV(numFolds, i); if (h.getDebug()) System.out.println(":- Fold [" + i + "/" + numFolds + "] -: " + MLUtils.getDatasetName(D) + "\tL=" + D.classIndex() + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances() + ")\tLC(t:T)=" + Utils.roundDouble(MLUtils.labelCardinality(D_train, D.classIndex()), 2) + ":" + Utils.roundDouble(MLUtils.labelCardinality(D_test, D.classIndex()), 2) + ")"); r_[i] = evaluateModel(h, D_train, D_test); // <-- should not run stats yet! } Result r = MLEvalUtils.combinePredictions(r_); if (h instanceof MultiTargetClassifier || isMT(D)) { r.setInfo("Type", "MT-CV"); } else if (h instanceof MultiLabelClassifier) { r.setInfo("Type", "ML-CV"); try { r.setInfo("Threshold", String.valueOf(Double.parseDouble(top))); } catch (Exception e) { System.err.println( "[WARNING] Automatic threshold calibration not currently enabled for cross-fold validation, setting threshold = 0.5.\n"); r.setInfo("Threshold", String.valueOf(0.5)); } } r.setInfo("Verbosity", vop); r.output = Result.getStats(r, vop); // Need to reset this because of CV r.setValue("Number of training instances", D.numInstances()); r.setValue("Number of test instances", D.numInstances()); return r; }
From source file:meka.classifiers.multilabel.Maniac.java
License:Open Source License
@Override public Instances transformLabels(Instances D) throws Exception { // crazy scala-specific stuff that is necessary to access // "static" methods from java org.kramerlab.autoencoder.package$ autoencoderStatics = org.kramerlab.autoencoder.package$.MODULE$; org.kramerlab.autoencoder.wekacompatibility.package$ wekaStatics = org.kramerlab.autoencoder.wekacompatibility.package$.MODULE$; org.kramerlab.autoencoder.experiments.package$ experimentsStatics = org.kramerlab.autoencoder.experiments.package$.MODULE$; int topiter = -1; // the optimization is a bit special, since we learn a stream // of autoencoders, no need to start from scratch, we just add layers if (this.isOptimizeAE()) { Instances train = D.trainCV(3, 1); Instances test = D.testCV(3, 1); Instances labels = this.extractPart(train, true); // first convert the arff into non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(labels); Instances aeData = Filter.useFilter(labels, spfilter); // now convert it into a format suitable for the autoencoder Mat data = wekaStatics.instancesToMat(aeData); Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java( autoencoderStatics.Sigmoid(), // type of neurons. // Sigmoid is ok this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) / // 2 this.getCompression(), // compression from k-th layer to (k+1)-th layer data, // training data true, // true = L2 Error, false = CrossEntropy autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers()); // test each autoencoder, select the best classifier double bestAccuracy = Double.NEGATIVE_INFINITY; int iteratorcount = 0; topiter = 0;/*from w w w .ja va 2s.c o m*/ for (Autoencoder a : autoencoders) { iteratorcount++; Maniac candidate = new Maniac(); candidate.setOptimizeAE(false); candidate.setNumberAutoencoders(this.getNumberAutoencoders()); candidate.setCompression(this.getCompression()); candidate.setClassifier(this.getClassifier()); candidate.setAE(a); Result res = Evaluation.evaluateModel(candidate, train, test); double curac = (Double) res.getValue("Accuracy"); if (bestAccuracy < curac) { bestAccuracy = curac; topiter = iteratorcount; } } } Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); // first convert the arff into non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(labels); Instances aeData = Filter.useFilter(labels, spfilter); // now convert it into a format suitable for the autoencoder Mat data = wekaStatics.instancesToMat(aeData); if (this.getAE() == null) { Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java( autoencoderStatics.Sigmoid(), // type of neurons. // Sigmoid is ok this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) / // 2 this.getCompression(), // compression from k-th layer to (k+1)-th layer data, // training data true, // true = L2 Error, false = CrossEntropy autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers()); int itercount = 0; for (Autoencoder a : autoencoders) { itercount++; if (topiter > 0 && itercount == topiter || itercount == this.getNumberAutoencoders()) { this.setAE(a); break; } } } Mat compressed = this.getAE().compress(data); Instances compressedLabels = wekaStatics.matToInstances(compressed); // remember the labels to use for the prediction step, this.compressedTemplateInst = new Instances(compressedLabels); Instances result = Instances.mergeInstances(compressedLabels, features); result.setClassIndex(compressedLabels.numAttributes()); return result; }
From source file:meka.experiment.evaluators.CrossValidation.java
License:Open Source License
/** * Returns the evaluation statistics generated for the dataset (sequential execution). * * @param classifier the classifier to evaluate * @param dataset the dataset to evaluate on * @return the statistics/*w w w .j a v a 2 s .c o m*/ */ protected List<EvaluationStatistics> evaluateSequential(MultiLabelClassifier classifier, Instances dataset) { List<EvaluationStatistics> result; EvaluationStatistics stats; Instances train; Instances test; Result res; int i; Random rand; MultiLabelClassifier current; result = new ArrayList<>(); rand = new Random(m_Seed); for (i = 1; i <= m_NumFolds; i++) { log("Fold: " + i); if (m_PreserveOrder) train = dataset.trainCV(m_NumFolds, i - 1); else train = dataset.trainCV(m_NumFolds, i - 1, rand); test = dataset.testCV(m_NumFolds, i - 1); try { current = (MultiLabelClassifier) OptionUtils.shallowCopy(classifier); res = Evaluation.evaluateModel(current, train, test, m_Threshold, m_Verbosity); stats = new EvaluationStatistics(classifier, dataset, res); stats.put(KEY_FOLD, i); result.add(stats); } catch (Exception e) { handleException("Failed to evaluate dataset '" + dataset.relationName() + "' with classifier: " + Utils.toCommandLine(classifier), e); break; } if (m_Stopped) break; } if (m_Stopped) result.clear(); return result; }
From source file:meka.experiment.evaluators.CrossValidation.java
License:Open Source License
/** * Returns the evaluation statistics generated for the dataset (parallel execution). * * @param classifier the classifier to evaluate * @param dataset the dataset to evaluate on * @return the statistics//from ww w.ja v a2s.c o m */ protected List<EvaluationStatistics> evaluateParallel(final MultiLabelClassifier classifier, final Instances dataset) { List<EvaluationStatistics> result; ArrayList<EvaluatorJob> jobs; EvaluatorJob job; int i; Random rand; result = new ArrayList<>(); debug("pre: create jobs"); jobs = new ArrayList<>(); rand = new Random(m_Seed); for (i = 1; i <= m_NumFolds; i++) { final int index = i; final Instances train; final Instances test; final MultiLabelClassifier current; if (m_PreserveOrder) train = dataset.trainCV(m_NumFolds, index - 1); else train = dataset.trainCV(m_NumFolds, index - 1, rand); test = dataset.testCV(m_NumFolds, index - 1); current = (MultiLabelClassifier) OptionUtils.shallowCopy(classifier); job = new EvaluatorJob() { protected List<EvaluationStatistics> doCall() throws Exception { List<EvaluationStatistics> result = new ArrayList<>(); log("Executing fold #" + index + "..."); try { Result res = Evaluation.evaluateModel(current, train, test, m_Threshold, m_Verbosity); EvaluationStatistics stats = new EvaluationStatistics(classifier, dataset, res); stats.put(KEY_FOLD, index); result.add(stats); } catch (Exception e) { handleException("Failed to evaluate dataset '" + dataset.relationName() + "' with classifier: " + Utils.toCommandLine(classifier), e); } log("...finished fold #" + index); return result; } }; jobs.add(job); } debug("post: create jobs"); // execute jobs m_Executor = Executors.newFixedThreadPool(m_ActualNumThreads); debug("pre: submit"); try { for (i = 0; i < jobs.size(); i++) m_Executor.submit(jobs.get(i)); } catch (RejectedExecutionException e) { // ignored } catch (Exception e) { handleException("Failed to start up jobs", e); } debug("post: submit"); debug("pre: shutdown"); m_Executor.shutdown(); debug("post: shutdown"); // wait for threads to finish debug("pre: wait"); while (!m_Executor.isTerminated()) { try { m_Executor.awaitTermination(100, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { // ignored } catch (Exception e) { handleException("Failed to await termination", e); } } debug("post: wait"); // collect results debug("pre: collect"); for (i = 0; i < jobs.size(); i++) result.addAll(jobs.get(i).getResult()); debug("post: collect"); return result; }
From source file:mlpoc.MLPOC.java
public static Evaluation crossValidate(String filename) { Evaluation eval = null;//from www . j a v a 2 s .c o m try { BufferedReader br = new BufferedReader(new FileReader(filename)); // loads data and set class index Instances data = new Instances(br); br.close(); /*File csv=new File(filename); CSVLoader loader = new CSVLoader(); loader.setSource(csv); Instances data = loader.getDataSet();*/ data.setClassIndex(data.numAttributes() - 1); // classifier String[] tmpOptions; String classname = "weka.classifiers.trees.J48 -C 0.25"; tmpOptions = classname.split(" "); classname = "weka.classifiers.trees.J48"; tmpOptions[0] = ""; Classifier cls = (Classifier) Utils.forName(Classifier.class, classname, tmpOptions); // other options int seed = 2; int folds = 10; // randomize data Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); if (randData.classAttribute().isNominal()) randData.stratify(folds); // perform cross-validation eval = new Evaluation(randData); for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); // build and evaluate classifier Classifier clsCopy = Classifier.makeCopy(cls); clsCopy.buildClassifier(train); eval.evaluateModel(clsCopy, test); } // output evaluation System.out.println(); System.out.println("=== Setup ==="); System.out .println("Classifier: " + cls.getClass().getName() + " " + Utils.joinOptions(cls.getOptions())); System.out.println("Dataset: " + data.relationName()); System.out.println("Folds: " + folds); System.out.println("Seed: " + seed); System.out.println(); System.out.println(eval.toSummaryString("Summary for testing", true)); System.out.println("Correctly Classified Instances: " + eval.correct()); System.out.println("Percentage of Correctly Classified Instances: " + eval.pctCorrect()); System.out.println("InCorrectly Classified Instances: " + eval.incorrect()); System.out.println("Percentage of InCorrectly Classified Instances: " + eval.pctIncorrect()); } catch (Exception ex) { System.err.println(ex.getMessage()); } return eval; }