List of usage examples for weka.core Instance classValue
public double classValue();
From source file:moa.tasks.EvaluatePrequentialRegression.java
License:Open Source License
@Override protected Object doMainTask(TaskMonitor monitor, ObjectRepository repository) { Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption); InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption); RegressionPerformanceEvaluator evaluator = (RegressionPerformanceEvaluator) getPreparedClassOption( this.evaluatorOption); LearningCurve learningCurve = new LearningCurve("learning evaluation instances"); //New for prequential methods if (evaluator instanceof WindowClassificationPerformanceEvaluator) { //((WindowClassificationPerformanceEvaluator) evaluator).setWindowWidth(widthOption.getValue()); if (widthOption.getValue() != 1000) { System.out.println(/*ww w . j av a 2s. c om*/ "DEPRECATED! Use EvaluatePrequential -e (WindowClassificationPerformanceEvaluator -w " + widthOption.getValue() + ")"); return learningCurve; } } if (evaluator instanceof EWMAClassificationPerformanceEvaluator) { //((EWMAClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue()); if (alphaOption.getValue() != .01) { System.out.println( "DEPRECATED! Use EvaluatePrequential -e (EWMAClassificationPerformanceEvaluator -a " + alphaOption.getValue() + ")"); return learningCurve; } } if (evaluator instanceof FadingFactorClassificationPerformanceEvaluator) { //((FadingFactorClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue()); if (alphaOption.getValue() != .01) { System.out.println( "DEPRECATED! Use EvaluatePrequential -e (FadingFactorClassificationPerformanceEvaluator -a " + alphaOption.getValue() + ")"); return learningCurve; } } //End New for prequential methods learner.setModelContext(stream.getHeader()); int maxInstances = this.instanceLimitOption.getValue(); long instancesProcessed = 0; int maxSeconds = this.timeLimitOption.getValue(); int secondsElapsed = 0; monitor.setCurrentActivity("Evaluating learner...", -1.0); File dumpFile = this.dumpFileOption.getFile(); PrintStream immediateResultStream = null; if (dumpFile != null) { try { if (dumpFile.exists()) { immediateResultStream = new PrintStream(new FileOutputStream(dumpFile, true), true); } else { immediateResultStream = new PrintStream(new FileOutputStream(dumpFile), true); } } catch (Exception ex) { throw new RuntimeException("Unable to open immediate result file: " + dumpFile, ex); } } //File for output predictions File outputPredictionFile = this.outputPredictionFileOption.getFile(); PrintStream outputPredictionResultStream = null; if (outputPredictionFile != null) { try { if (outputPredictionFile.exists()) { outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile, true), true); } else { outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile), true); } } catch (Exception ex) { throw new RuntimeException("Unable to open prediction result file: " + outputPredictionFile, ex); } } boolean firstDump = true; boolean preciseCPUTiming = TimingUtils.enablePreciseTiming(); long evaluateStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread(); long lastEvaluateStartTime = evaluateStartTime; double RAMHours = 0.0; while (stream.hasMoreInstances() && ((maxInstances < 0) || (instancesProcessed < maxInstances)) && ((maxSeconds < 0) || (secondsElapsed < maxSeconds))) { Instance trainInst = stream.nextInstance(); Instance testInst = (Instance) trainInst.copy(); double trueClass = trainInst.classValue(); //testInst.setClassMissing(); double[] prediction = learner.getVotesForInstance(testInst); // Output prediction if (outputPredictionFile != null) { outputPredictionResultStream.println(prediction[0] + "," + trueClass); } //evaluator.addClassificationAttempt(trueClass, prediction, testInst.weight()); evaluator.addResult(testInst, prediction); learner.trainOnInstance(trainInst); instancesProcessed++; if (instancesProcessed % this.sampleFrequencyOption.getValue() == 0 || stream.hasMoreInstances() == false) { long evaluateTime = TimingUtils.getNanoCPUTimeOfCurrentThread(); double time = TimingUtils.nanoTimeToSeconds(evaluateTime - evaluateStartTime); double timeIncrement = TimingUtils.nanoTimeToSeconds(evaluateTime - lastEvaluateStartTime); double RAMHoursIncrement = learner.measureByteSize() / (1024.0 * 1024.0 * 1024.0); //GBs RAMHoursIncrement *= (timeIncrement / 3600.0); //Hours RAMHours += RAMHoursIncrement; lastEvaluateStartTime = evaluateTime; learningCurve.insertEntry(new LearningEvaluation( new Measurement[] { new Measurement("learning evaluation instances", instancesProcessed), new Measurement("evaluation time (" + (preciseCPUTiming ? "cpu " : "") + "seconds)", time), new Measurement("model cost (RAM-Hours)", RAMHours) }, evaluator, learner)); if (immediateResultStream != null) { if (firstDump) { immediateResultStream.println(learningCurve.headerToString()); firstDump = false; } immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1)); immediateResultStream.flush(); } } if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) { if (monitor.taskShouldAbort()) { return null; } long estimatedRemainingInstances = stream.estimatedRemainingInstances(); if (maxInstances > 0) { long maxRemaining = maxInstances - instancesProcessed; if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) { estimatedRemainingInstances = maxRemaining; } } monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0 : (double) instancesProcessed / (double) (instancesProcessed + estimatedRemainingInstances)); if (monitor.resultPreviewRequested()) { monitor.setLatestResultPreview(learningCurve.copy()); } secondsElapsed = (int) TimingUtils .nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread() - evaluateStartTime); } } if (immediateResultStream != null) { immediateResultStream.close(); } if (outputPredictionResultStream != null) { outputPredictionResultStream.close(); } return learningCurve; }
From source file:myclassifier.MyC45.java
/** * Method building ID3 tree.//from w w w.j a v a 2 s. c o m * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = -1; //Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] gainRatios = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); gainRatios[att.index()] = computeGainRatio(data, att); } m_Attribute = data.attribute(Utils.maxIndex(gainRatios)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(gainRatios[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new MyC45[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyC45(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:myclassifier.MyC45.java
private double computeEntropyFromData(Instances data) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; }/*from ww w . j a va 2 s . c o m*/ double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) entropy -= (double) (classCounts[j] / data.numInstances()) * Utils.log2((double) classCounts[j] / data.numInstances()); } //return entropy + Utils.log2(data.numInstances()); return entropy; }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Adds instance to subDataset./* ww w .j av a 2 s . com*/ * * @exception Exception if something goes wrong */ public void addInstance(int subDatasetIndex, Instance instance) throws Exception { int classIndex = (int) instance.classValue(); double weight = instance.weight(); w_perClassPerSubdataset[subDatasetIndex][classIndex] = w_perClassPerSubdataset[subDatasetIndex][classIndex] + weight; w_perSubdataset[subDatasetIndex] = w_perSubdataset[subDatasetIndex] + weight; w_perClass[classIndex] = w_perClass[classIndex] + weight; totalWeights = totalWeights + weight; }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Adds all instances with unknown values for given attribute, weighted * according to frequency of instances in each bag. * * @exception Exception if something goes wrong *//*from w ww . j a v a 2 s.c om*/ public void addInstWithMissValue(Instances dataSet, int attIndex) throws Exception { double[] valueProbs; double weight, newWeight; int classIndex; Instance instance; valueProbs = new double[w_perSubdataset.length]; for (int i = 0; i < w_perSubdataset.length; i++) { if (totalWeights == 0) { valueProbs[i] = 1.0 / valueProbs.length; } else { valueProbs[i] = w_perSubdataset[i] / totalWeights; } } Enumeration E = dataSet.enumerateInstances(); while (E.hasMoreElements()) { instance = (Instance) E.nextElement(); if (instance.isMissing(attIndex)) { classIndex = (int) instance.classValue(); weight = instance.weight(); w_perClass[classIndex] = w_perClass[classIndex] + weight; totalWeights += weight; for (int i = 0; i < w_perSubdataset.length; i++) { newWeight = valueProbs[i] * weight; w_perClassPerSubdataset[i][classIndex] += newWeight; w_perSubdataset[i] += newWeight; } } } }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Adds all instances in given range to given bag. * * @exception Exception if something goes wrong */// w ww .ja v a 2s. c om public final void addRange(int subDatasetIndex, Instances dataSet, int startIndex, int lastIndex) throws Exception { double sumOfWeights = 0; int classIndex; Instance data; for (int i = startIndex; i < lastIndex; i++) { data = (Instance) dataSet.instance(i); classIndex = (int) data.classValue(); sumOfWeights += data.weight(); w_perClassPerSubdataset[subDatasetIndex][classIndex] += data.weight(); w_perClass[classIndex] += data.weight(); } w_perSubdataset[subDatasetIndex] += sumOfWeights; totalWeights += sumOfWeights; }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Adds given instance to all bags weighting it according to given weights. * * @exception Exception if something goes wrong *///w w w . ja v a 2s. co m public void addWeights(Instance instance, double[] weights) throws Exception { int classIndex; classIndex = (int) instance.classValue(); for (int i = 0; i < w_perSubdataset.length; i++) { double weight = instance.weight() * weights[i]; w_perClassPerSubdataset[i][classIndex] += weight; w_perSubdataset[i] += weight; w_perClass[classIndex] += weight; totalWeights += weight; } }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Move instances in given range from one bag to another one. * * @exception Exception if something goes wrong *//*from ww w .j a v a 2s. c o m*/ public void moveInstancesWithRange(int from, int to, Instances dataSet, int startIndex, int lastIndex) throws Exception { int classIndex; double weight; Instance data; for (int i = startIndex; i < lastIndex; i++) { data = (Instance) dataSet.instance(i); classIndex = (int) data.classValue(); weight = data.weight(); w_perClassPerSubdataset[from][classIndex] -= weight; w_perClassPerSubdataset[to][classIndex] += weight; w_perSubdataset[from] -= weight; w_perSubdataset[to] += weight; } }
From source file:myID3.MyId3.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance//w w w. ja va 2 s . c o m */ public void buildTree(Instances data) { if (data.numInstances() > 0) { // Lets find the highest Information Gain! // First compute each information gain attribute double IG[] = new double[data.numAttributes()]; Enumeration enumAttribute = data.enumerateAttributes(); while (enumAttribute.hasMoreElements()) { Attribute attribute = (Attribute) enumAttribute.nextElement(); IG[attribute.index()] = informationGain(data, attribute); // System.out.println(attribute.toString() + ": " + IG[attribute.index()]); } // Assign it as the tree attribute! currentAttribute = data.attribute(maxIndex(IG)); //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]); // IG = 0 then current node = leaf! if (Utils.eq(IG[currentAttribute.index()], 0)) { // Set the class value as the highest frequency of the class currentAttribute = null; classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); classValue = Utils.maxIndex(classDistribution); classAttribute = data.classAttribute(); } else { // Create another node from the current tree Instances[] splitData = splitDataByAttribute(data, currentAttribute); nodes = new MyId3[currentAttribute.numValues()]; for (int i = 0; i < currentAttribute.numValues(); i++) { nodes[i] = new MyId3(); nodes[i].buildTree(splitData[i]); } } } else { classAttribute = null; classValue = Utils.missingValue(); classDistribution = new double[data.numClasses()]; } }
From source file:myID3.MyId3.java
/** * Find the entropy from a given dataset * @param data//from w ww . j a va 2 s .co m * @return */ private double entropy(Instances data) { /* Entropy = -(p1 log2 p1) -(p2 log2 p2).... */ double numInstance = data.numInstances(); double numClass = data.numClasses(); double[] distribution = new double[data.numClasses()]; Enumeration instance = data.enumerateInstances(); while (instance.hasMoreElements()) { Instance temp = (Instance) instance.nextElement(); /* Count the p1, p2 */ distribution[(int) temp.classValue()]++; } /* Sum all the distribution */ double sum = 0; for (int i = 0; i < numClass; i++) { distribution[i] = distribution[i] / numInstance; if (distribution[i] > 0.0) distribution[i] *= Utils.log2(distribution[i]); // System.out.println(Arrays.toString(distribution)); sum += distribution[i]; } return -1 * sum; }