List of usage examples for weka.core Instance classValue
public double classValue();
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Prints the predictions for the given dataset into a String variable. *//*from www .j a va 2 s . co m*/ protected static String printClassifications(Classifier classifier, Instances train, String testFileName, int classIndex, Range attributesToOutput) throws Exception { StringBuffer text = new StringBuffer(); if (testFileName.length() != 0) { BufferedReader testReader = null; try { testReader = new BufferedReader(new FileReader(testFileName)); } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } Instances test = new Instances(testReader, 1); if (classIndex != -1) { test.setClassIndex(classIndex - 1); } else { test.setClassIndex(test.numAttributes() - 1); } int i = 0; while (test.readInstance(testReader)) { Instance instance = test.instance(0); Instance withMissing = (Instance) instance.copy(); withMissing.setDataset(test); double predValue = ((Classifier) classifier).classifyInstance(withMissing); if (test.classAttribute().isNumeric()) { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + predValue + " "); } if (instance.classIsMissing()) { text.append("missing"); } else { text.append(instance.classValue()); } text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } else { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + test.classAttribute().value((int) predValue) + " "); } if (Instance.isMissingValue(predValue)) { text.append("missing "); } else { text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " "); } text.append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } test.delete(0); i++; } testReader.close(); } return text.toString(); }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Updates all the statistics about a classifiers performance for * the current test instance.//from w ww . j av a2 s .c om * * @param predictedDistribution the probabilities assigned to * each class * @param instance the instance to be classified * @exception Exception if the class of the instance is not * set */ protected void updateStatsForClassifier(double[] predictedDistribution, Instance instance) throws Exception { int actualClass = (int) instance.classValue(); double costFactor = 1; if (!instance.classIsMissing()) { updateMargins(predictedDistribution, actualClass, instance.weight()); // Determine the predicted class (doesn't detect multiple // classifications) int predictedClass = -1; double bestProb = 0.0; for (int i = 0; i < m_NumClasses; i++) { if (predictedDistribution[i] > bestProb) { predictedClass = i; bestProb = predictedDistribution[i]; } } m_WithClass += instance.weight(); // Determine misclassification cost if (m_CostMatrix != null) { if (predictedClass < 0) { // For missing predictions, we assume the worst possible cost. // This is pretty harsh. // Perhaps we could take the negative of the cost of a correct // prediction (-m_CostMatrix.getElement(actualClass,actualClass)), // although often this will be zero m_TotalCost += instance.weight() * m_CostMatrix.getMaxCost(actualClass); } else { m_TotalCost += instance.weight() * m_CostMatrix.getElement(actualClass, predictedClass); } } // Update counts when no class was predicted if (predictedClass < 0) { m_Unclassified += instance.weight(); return; } double predictedProb = Math.max(MIN_SF_PROB, predictedDistribution[actualClass]); double priorProb = Math.max(MIN_SF_PROB, m_ClassPriors[actualClass] / m_ClassPriorsSum); if (predictedProb >= priorProb) { m_SumKBInfo += (Utils.log2(predictedProb) - Utils.log2(priorProb)) * instance.weight(); } else { m_SumKBInfo -= (Utils.log2(1.0 - predictedProb) - Utils.log2(1.0 - priorProb)) * instance.weight(); } m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight(); m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight(); updateNumericScores(predictedDistribution, makeDistribution(instance.classValue()), instance.weight()); // Update other stats m_ConfusionMatrix[actualClass][predictedClass] += instance.weight(); if (predictedClass != actualClass) { m_Incorrect += instance.weight(); } else { m_Correct += instance.weight(); } } else { m_MissingClass += instance.weight(); } }
From source file:gnusmail.learning.ClassifierManager.java
License:Open Source License
private boolean goodPrediction(double[] prediction, Instance trainInst) { return Utils.maxIndex(prediction) == (int) trainInst.classValue(); }
From source file:gnusmail.learning.ClassifierManager.java
License:Open Source License
/** * This method reads the messages in chronological order, and updates the * underlying model with each message// www.ja v a2 s .co m * * @return */ public List<Double> incrementallyTrainModel(DocumentReader reader, String wekaClassifier, FilterManager fm) { List<Double> successes = new ArrayList<Double>(); try { Classifier model = null; model = (Classifier) Class.forName(wekaClassifier).newInstance(); try { model.buildClassifier(filterManager.getDataset()); } catch (Exception ex) { Logger.getLogger(ClassifierManager.class.getName()).log(Level.SEVERE, null, ex); } UpdateableClassifier updateableModel = (UpdateableClassifier) model; for (Document doc : reader) { double predictedClass = 0.0; try { Instance inst = doc.toWekaInstance(fm); predictedClass = model.classifyInstance(inst); double trueClass = inst.classValue(); successes.add((predictedClass == trueClass) ? 1.0 : 0.0); updateableModel.updateClassifier(inst); } catch (Exception ex) { Logger.getLogger(ClassifierManager.class.getName()).log(Level.SEVERE, null, ex); } } FileOutputStream f = new FileOutputStream(ConfigManager.MODEL_FILE); ObjectOutputStream fis = new ObjectOutputStream(f); fis.writeObject(updateableModel); fis.close(); } catch (Exception ex) { Logger.getLogger(ClassifierManager.class.getName()).log(Level.SEVERE, null, ex); } return successes; }
From source file:gyc.OverBoostM1.java
License:Open Source License
/** * Sets the weights for the next iteration. * //from w w w . j a v a2 s . c o m * @param training the training instances * @param reweight the reweighting factor * @throws Exception if something goes wrong */ protected void setWeights(Instances training, double reweight) throws Exception { double oldSumOfWeights, newSumOfWeights; oldSumOfWeights = training.sumOfWeights(); Enumeration enu = training.enumerateInstances(); while (enu.hasMoreElements()) { Instance instance = (Instance) enu.nextElement(); if (!Utils.eq(m_Classifiers[m_NumIterationsPerformed].classifyInstance(instance), instance.classValue())) instance.setWeight(instance.weight() * reweight); } // Renormalize weights newSumOfWeights = training.sumOfWeights(); enu = training.enumerateInstances(); while (enu.hasMoreElements()) { Instance instance = (Instance) enu.nextElement(); instance.setWeight(instance.weight() * oldSumOfWeights / newSumOfWeights); } }
From source file:hsa_jni.hsa_jni.EvaluatePeriodicHeldOutTestBatch.java
License:Open Source License
@Override protected Object doMainTask(TaskMonitor monitor, ObjectRepository repository) { Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption); InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption); ClassificationPerformanceEvaluator evaluator = (ClassificationPerformanceEvaluator) getPreparedClassOption( this.evaluatorOption); learner.setModelContext(stream.getHeader()); long instancesProcessed = 0; LearningCurve learningCurve = new LearningCurve("evaluation instances"); File dumpFile = this.dumpFileOption.getFile(); PrintStream immediateResultStream = null; if (dumpFile != null) { try {/*from www .ja va 2 s. co m*/ if (dumpFile.exists()) { immediateResultStream = new PrintStream(new FileOutputStream(dumpFile, true), true); } else { immediateResultStream = new PrintStream(new FileOutputStream(dumpFile), true); } } catch (Exception ex) { throw new RuntimeException("Unable to open immediate result file: " + dumpFile, ex); } } boolean firstDump = true; InstanceStream testStream = null; int testSize = this.testSizeOption.getValue(); if (this.cacheTestOption.isSet()) { monitor.setCurrentActivity("Caching test examples...", -1.0); Instances testInstances = new Instances(stream.getHeader(), this.testSizeOption.getValue()); while (testInstances.numInstances() < testSize) { testInstances.add(stream.nextInstance()); if (testInstances.numInstances() % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) { if (monitor.taskShouldAbort()) { return null; } monitor.setCurrentActivityFractionComplete( (double) testInstances.numInstances() / (double) (this.testSizeOption.getValue())); } } testStream = new CachedInstancesStream(testInstances); } else { //testStream = (InstanceStream) stream.copy(); testStream = stream; /*monitor.setCurrentActivity("Skipping test examples...", -1.0); for (int i = 0; i < testSize; i++) { stream.nextInstance(); }*/ } instancesProcessed = 0; TimingUtils.enablePreciseTiming(); double totalTrainTime = 0.0; while ((this.trainSizeOption.getValue() < 1 || instancesProcessed < this.trainSizeOption.getValue()) && stream.hasMoreInstances() == true) { monitor.setCurrentActivityDescription("Training..."); long instancesTarget = instancesProcessed + this.sampleFrequencyOption.getValue(); ArrayList<Instance> instanceCache = new ArrayList<Instance>(); long trainStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread(); double lastTrainTime = 0; while (instancesProcessed < instancesTarget && stream.hasMoreInstances() == true) { instanceCache.add(stream.nextInstance()); instancesProcessed++; if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) { if (monitor.taskShouldAbort()) { return null; } monitor.setCurrentActivityFractionComplete( (double) (instancesProcessed) / (double) (this.trainSizeOption.getValue())); } if (instanceCache.size() % 1000 == 0) { trainStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread(); for (Instance inst : instanceCache) { learner.trainOnInstance(inst); } lastTrainTime += TimingUtils .nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread() - trainStartTime); instanceCache.clear(); } } trainStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread(); for (Instance inst : instanceCache) { learner.trainOnInstance(inst); } if (learner instanceof BatchClassifier) ((BatchClassifier) learner).commit(); lastTrainTime += TimingUtils .nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread() - trainStartTime); totalTrainTime += lastTrainTime; if (totalTrainTime > this.trainTimeOption.getValue()) { break; } if (this.cacheTestOption.isSet()) { testStream.restart(); } evaluator.reset(); long testInstancesProcessed = 0; monitor.setCurrentActivityDescription("Testing (after " + StringUtils.doubleToString( ((double) (instancesProcessed) / (double) (this.trainSizeOption.getValue()) * 100.0), 2) + "% training)..."); long testStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread(); int instCount = 0; for (instCount = 0; instCount < testSize; instCount++) { if (stream.hasMoreInstances() == false) { break; } Instance testInst = (Instance) testStream.nextInstance().copy(); double trueClass = testInst.classValue(); testInst.setClassMissing(); double[] prediction = learner.getVotesForInstance(testInst); testInst.setClassValue(trueClass); evaluator.addResult(testInst, prediction); testInstancesProcessed++; if (testInstancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) { if (monitor.taskShouldAbort()) { return null; } monitor.setCurrentActivityFractionComplete( (double) testInstancesProcessed / (double) (testSize)); } } if (instCount != testSize) { break; } double testTime = TimingUtils .nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread() - testStartTime); List<Measurement> measurements = new ArrayList<Measurement>(); measurements.add(new Measurement("evaluation instances", instancesProcessed)); measurements.add(new Measurement("total train time", totalTrainTime)); measurements.add(new Measurement("total train speed", instancesProcessed / totalTrainTime)); measurements.add(new Measurement("last train time", lastTrainTime)); measurements.add( new Measurement("last train speed", this.sampleFrequencyOption.getValue() / lastTrainTime)); measurements.add(new Measurement("test time", testTime)); measurements.add(new Measurement("test speed", this.testSizeOption.getValue() / testTime)); Measurement[] performanceMeasurements = evaluator.getPerformanceMeasurements(); for (Measurement measurement : performanceMeasurements) { measurements.add(measurement); } Measurement[] modelMeasurements = learner.getModelMeasurements(); for (Measurement measurement : modelMeasurements) { measurements.add(measurement); } learningCurve.insertEntry( new LearningEvaluation(measurements.toArray(new Measurement[measurements.size()]))); if (immediateResultStream != null) { if (firstDump) { immediateResultStream.println(learningCurve.headerToString()); firstDump = false; } immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1)); immediateResultStream.flush(); } if (monitor.resultPreviewRequested()) { monitor.setLatestResultPreview(learningCurve.copy()); } // if (learner instanceof HoeffdingTree // || learner instanceof HoeffdingOptionTree) { // int numActiveNodes = (int) Measurement.getMeasurementNamed( // "active learning leaves", // modelMeasurements).getValue(); // // exit if tree frozen // if (numActiveNodes < 1) { // break; // } // int numNodes = (int) Measurement.getMeasurementNamed( // "tree size (nodes)", modelMeasurements) // .getValue(); // if (numNodes == lastNumNodes) { // noGrowthCount++; // } else { // noGrowthCount = 0; // } // lastNumNodes = numNodes; // } else if (learner instanceof OzaBoost || learner instanceof // OzaBag) { // double numActiveNodes = Measurement.getMeasurementNamed( // "[avg] active learning leaves", // modelMeasurements).getValue(); // // exit if all trees frozen // if (numActiveNodes == 0.0) { // break; // } // int numNodes = (int) (Measurement.getMeasurementNamed( // "[avg] tree size (nodes)", // learner.getModelMeasurements()).getValue() * Measurement // .getMeasurementNamed("ensemble size", // modelMeasurements).getValue()); // if (numNodes == lastNumNodes) { // noGrowthCount++; // } else { // noGrowthCount = 0; // } // lastNumNodes = numNodes; // } } if (immediateResultStream != null) { immediateResultStream.close(); } return learningCurve; }
From source file:imba.classifier.FFNNTubes.java
private void setTarget(Instances data) { target = new double[nData][nOutput]; for (int i = 0; i < nData; i++) { Instance current = data.get(i); for (int j = 0; j < nOutput; j++) { if (j == current.classValue()) { target[i][j] = 1.0;/* w w w . j av a 2 s . c o m*/ } else { target[i][j] = 0.0; } } } }
From source file:iris.ID3.java
public void makeLikeAWhat(Instances instances) { // Create storage for different info gains double[] infoGains = new double[instances.numAttributes()]; // Enumerate through attributes to find the best gain Enumeration attributeEnum = instances.enumerateAttributes(); while (attributeEnum.hasMoreElements()) { // Loop through attributes, adding gain to infoGains array Attribute att = (Attribute) attributeEnum.nextElement(); infoGains[att.index()] = infoGain(instances, att); }/*from w w w . j a va 2s . co m*/ // Use maxIndex to find the highest info gain in the array highestInfoGain = instances.attribute(Utils.maxIndex(infoGains)); // Make a leaf if there is no more info to gain // Otherwise, create children // Check if there is no more info to gain if (Utils.eq(infoGains[highestInfoGain.index()], 0)) { highestInfoGain = null; // Instantiate maxDistribution maxDistribution = new double[instances.numClasses()]; // Set up enumerator for instances Enumeration instanceEnum = instances.enumerateInstances(); // Tally classes while (instanceEnum.hasMoreElements()) { Instance instance = (Instance) instanceEnum.nextElement(); maxDistribution[(int) instance.classValue()]++; } // Normalize data for easier manipulation Utils.normalize(maxDistribution); // Get the max index of the distrubtion classValue = Utils.maxIndex(maxDistribution); // Save class attribute classAttribute = instances.classAttribute(); } // Create children else { // Split best attribute into bins Instances[] bins = makeBins(instances, highestInfoGain); // Create nodes children = new ID3[highestInfoGain.numValues()]; for (int i = 0; i < highestInfoGain.numValues(); i++) { children[i] = new ID3(); children[i].makeLikeAWhat(bins[i]); } } }
From source file:iris.Network.java
public void backPropogate(Instance instance) throws Exception { // Loop through until we are happy with the classification // Start at the output layer and work back for (int i = layers.size() - 1; i >= 0; i--) { // Check for output layer if (i == layers.size() - 1) { // Loop through neurons in output layer for (int j = 0; j < neuronsInEachLayer.get(i); j++) { // Set variables for weight update equation double activationValue = layers.get(i).getNeuron(j).outputValue; double targetValue = 0; double leftValue = 0; // Set node index to classifications if (instance.classValue() == j) { targetValue = 1;//from w w w . ja v a 2 s.c o m } // Setting error value for node layers.get(i).getNeuron(j).error = activationValue * (1 - activationValue) * (activationValue - targetValue); double error = layers.get(i).getNeuron(j).error; // because im lazy // Loop and update weights for (int k = 0; k < layers.get(i).getNeuron(j).weights.size(); k++) { leftValue = layers.get(i - 1).getNeuron(j).outputValue; // get the K node in the layer to the left corresponding to the weight we're changing layers.get(i).getNeuron(j).setNewWeight(k, (layers.get(i).getNeuron(j).getWeight(k) - (learningRate * error * leftValue))); // set new weight } } } // Else, it's a hidden layer else { // Loop through neurons in hidden layer for (int j = 0; j < neuronsInEachLayer.get(i); j++) { // Set variables for weight update double activationValue = layers.get(i).getNeuron(j).outputValue; double rightWeight = 0; double rightError = 0; double leftValue = 0; double sumWeightsWithError = 0; // Calculate Sum of rightWeights * rightError for (int a = 0; a < layers.get(i + 1).neurons.size(); a++) // loop through number of neurons to the right { sumWeightsWithError += (layers.get(i + 1).neurons.get(a).getWeight(j) * layers.get(i + 1).neurons.get(a).error); } // Set error at the node layers.get(i).getNeuron(j).error = (activationValue * (1 - activationValue) * sumWeightsWithError); double error = layers.get(i).getNeuron(j).error; // because im lazy // Update weights loop for (int k = 0; k < layers.get(i).getNeuron(j).weights.size(); k++) { // if (i == 0) // { // leftValue = layers.get(i).getNeuron(j).outputValue; // } // else // { leftValue = layers.get(i).getNeuron(j).outputValue; //} layers.get(i).getNeuron(j).setNewWeight(k, layers.get(i).getNeuron(j).getWeight(k) - (learningRate * error * leftValue)); } } } } // End of Layers loop, update all weights in network // Loop through each layer for (int i = 0; i < layers.size(); i++) { // Loop through each neuron in this layer for (int j = 0; j < neuronsInEachLayer.get(i); j++) { layers.get(i).getNeuron(j).updateWeights(); } } }
From source file:j48.Distribution.java
License:Open Source License
/** * Adds given instance to given bag./* w w w . j a v a2 s. c o m*/ * * @exception Exception if something goes wrong */ public final void add(int bagIndex, Instance instance) throws Exception { int classIndex; double weight; classIndex = (int) instance.classValue(); weight = instance.weight(); m_perClassPerBag[bagIndex][classIndex] = m_perClassPerBag[bagIndex][classIndex] + weight; m_perBag[bagIndex] = m_perBag[bagIndex] + weight; m_perClass[classIndex] = m_perClass[classIndex] + weight; totaL = totaL + weight; }