List of usage examples for weka.core Utils probRound
public static int probRound(double value, Random rand)
From source file:milk.experiment.MIRandomSplitResultProducer.java
License:Open Source License
/** * Gets the results for a specified run number. Different run * numbers correspond to different randomizations of the data. Results * produced should be sent to the current ResultListener *// w ww . jav a 2s . c o m * @param run the run number to get results for. * @exception Exception if a problem occurs while getting the results */ public void doRun(int run) throws Exception { if (getRawOutput()) { if (m_ZipDest == null) { m_ZipDest = new OutputZipper(m_OutputFile); } } if (m_Instances == null) { throw new Exception("No Instances set"); } // Add in some fields to the key like run number, dataset name Object[] seKey = m_SplitEvaluator.getKey(); Object[] key = new Object[seKey.length + 2]; key[0] = Utils.backQuoteChars(m_Instances.relationName()); key[1] = "" + run; System.arraycopy(seKey, 0, key, 2, seKey.length); if (m_ResultListener.isResultRequired(this, key)) { // Randomize on a copy of the original dataset Exemplars runInstances = new Exemplars(m_Instances); Exemplars train; Exemplars test; if (!m_randomize) { // Don't do any randomization int trainSize = Utils.round(runInstances.numExemplars() * m_TrainPercent / 100); int testSize = runInstances.numExemplars() - trainSize; train = new Exemplars(runInstances, 0, trainSize); test = new Exemplars(runInstances, trainSize, testSize); } else { Random rand = new Random(run); runInstances.randomize(rand); // Nominal class if (runInstances.classAttribute().isNominal()) { // create the subset for each classs int numClasses = runInstances.numClasses(); Exemplars[] subsets = new Exemplars[numClasses + 1]; for (int i = 0; i < numClasses + 1; i++) { subsets[i] = new Exemplars(runInstances, 10); } // divide instances into subsets for (int i = 0; i < runInstances.numExemplars(); i++) { Exemplar inst = runInstances.exemplar(i); if (inst.getInstances().instance(0).classIsMissing()) { subsets[numClasses].add(inst); } else { subsets[(int) inst.classValue()].add(inst); } } // Compactify them for (int i = 0; i < numClasses + 1; i++) { subsets[i].compactify(); } // merge into train and test sets train = new Exemplars(runInstances, runInstances.numExemplars()); test = new Exemplars(runInstances, runInstances.numExemplars()); for (int i = 0; i < numClasses + 1; i++) { int trainSize = Utils.probRound(subsets[i].numExemplars() * m_TrainPercent / 100, rand); for (int j = 0; j < trainSize; j++) { train.add(subsets[i].exemplar(j)); } for (int j = trainSize; j < subsets[i].numExemplars(); j++) { test.add(subsets[i].exemplar(j)); } // free memory subsets[i] = null; } train.compactify(); test.compactify(); // randomize the final sets train.randomize(rand); test.randomize(rand); } else { // Numeric target int trainSize = Utils.probRound(runInstances.numExemplars() * m_TrainPercent / 100, rand); int testSize = runInstances.numExemplars() - trainSize; train = new Exemplars(runInstances, 0, trainSize); test = new Exemplars(runInstances, trainSize, testSize); } } try { Object[] seResults = m_SplitEvaluator.getResult(train, test); Object[] results = new Object[seResults.length + 1]; results[0] = getTimestamp(); System.arraycopy(seResults, 0, results, 1, seResults.length); if (m_debugOutput) { String resultName = ("" + run + "." + Utils.backQuoteChars(runInstances.relationName()) + "." + m_SplitEvaluator.toString()).replace(' ', '_'); resultName = Utils.removeSubstring(resultName, "weka.classifiers."); resultName = Utils.removeSubstring(resultName, "weka.filters."); resultName = Utils.removeSubstring(resultName, "weka.attributeSelection."); m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName); } m_ResultListener.acceptResult(this, key, results); } catch (Exception ex) { // Save the train and test datasets for debugging purposes? throw ex; } } }