List of usage examples for weka.core Instance classValue
public double classValue();
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Method for building an Id3 tree./*from w ww . j a v a 2 s . c om*/ * * @param data * the training data * @exception Exception * if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; double sum = 0; laplaceSmooth(m_Distribution, sum, data.numClasses()); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); boolean makeLeaf; makeLeaf = Utils.eq(infoGains[m_Attribute.index()], 0); Instances[] splitData = null; if (!makeLeaf) { splitData = splitData(data, m_Attribute); for (int i = 0; i < splitData.length; ++i) { if (splitData[i].numInstances() == data.numInstances()) { //System.out.println("When splitting on attrib // "+m_Attribute+", child "+i+" is same size as current, // making into leaf."); makeLeaf = true; break; } } } // Make leaf if information gain is zero. // Otherwise create successors. if (makeLeaf) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); double sum = 0; while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; sum += inst.weight(); } //laplace smooth the distribution instead laplaceSmooth(m_Distribution, sum, data.numClasses()); //Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { m_Successors = new MyId3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyId3(); m_Successors[j].buildClassifier(splitData[j]); } } }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Computes the entropy of a dataset.//from w w w .j a v a2s . c o m * * @param data * the data for which entropy is to be computed * @return the entropy of the data's class distribution */ private double computeEntropy(Instances data, Attribute att) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); int numInstances = 0; while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.isMissing(att)) continue; classCounts[(int) inst.classValue()]++; ++numInstances; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) numInstances; return entropy + Utils.log2(numInstances); }
From source file:net.sf.bddbddb.order.WekaInterface.java
License:LGPL
public static double cvError(int numFolds, Instances data0, String cClassName) { if (data0.numInstances() < numFolds) return Double.NaN; //more folds than elements if (numFolds == 0) return Double.NaN; // no folds if (data0.numInstances() == 0) return 0; //no instances Instances data = new Instances(data0); //data.randomize(new Random(System.currentTimeMillis())); data.stratify(numFolds);/*from w w w . ja v a 2s . c om*/ Assert._assert(data.classAttribute() != null); double[] estimates = new double[numFolds]; for (int i = 0; i < numFolds; ++i) { Instances trainData = data.trainCV(numFolds, i); Assert._assert(trainData.classAttribute() != null); Assert._assert(trainData.numInstances() != 0, "Cannot train classifier on 0 instances."); Instances testData = data.testCV(numFolds, i); Assert._assert(testData.classAttribute() != null); Assert._assert(testData.numInstances() != 0, "Cannot test classifier on 0 instances."); int temp = FindBestDomainOrder.TRACE; FindBestDomainOrder.TRACE = 0; Classifier classifier = buildClassifier(cClassName, trainData); FindBestDomainOrder.TRACE = temp; int count = testData.numInstances(); double loss = 0; double sum = 0; for (Enumeration e = testData.enumerateInstances(); e.hasMoreElements();) { Instance instance = (Instance) e.nextElement(); Assert._assert(instance != null); Assert._assert(instance.classAttribute() != null && instance.classAttribute() == trainData.classAttribute()); try { double testClass = classifier.classifyInstance(instance); double weight = instance.weight(); if (testClass != instance.classValue()) loss += weight; sum += weight; } catch (Exception ex) { FindBestDomainOrder.out.println("Exception while classifying: " + instance + "\n" + ex); } } estimates[i] = 1 - loss / sum; } double average = 0; for (int i = 0; i < numFolds; ++i) average += estimates[i]; return average / numFolds; }
From source file:net.sf.jclal.sampling.supervised.Resample.java
License:Open Source License
/** * * @param dataSet The dataset to extract the instances. */// w ww . ja v a 2 s . c om @Override public void sampling(IDataset dataSet) { if (!(dataSet instanceof WekaDataset)) { throw new RuntimeException("This sample method only can be used with a single label weka dataset"); } WekaDataset wekaDataSet = (WekaDataset) dataSet; int origSize = wekaDataSet.getNumInstances(); int sampleSize = (int) (origSize * getPercentageInstancesToLabelled() / 100); // Subsample that takes class distribution into consideration // Sort according to class attribute. wekaDataSet.getDataset().sort(wekaDataSet.getClassIndex()); // Create an index of where each class value starts int[] classIndices = new int[wekaDataSet.getNumClasses() + 1]; int currentClass = 0; classIndices[currentClass] = 0; for (int i = 0; i < dataSet.getNumInstances(); i++) { Instance current = dataSet.instance(i); if (current.classIsMissing()) { for (int j = currentClass + 1; j < classIndices.length; j++) { classIndices[j] = i; } break; } else if (current.classValue() != currentClass) { for (int j = currentClass + 1; j <= current.classValue(); j++) { classIndices[j] = i; } currentClass = (int) current.classValue(); } } if (currentClass <= wekaDataSet.getNumClasses()) { for (int j = currentClass + 1; j < classIndices.length; j++) { classIndices[j] = dataSet.getNumInstances(); } } int actualClasses = 0; for (int i = 0; i < classIndices.length - 1; i++) { if (classIndices[i] != classIndices[i + 1]) { actualClasses++; } } // Convert pending input instances if (isNoReplacement()) { createSubsampleWithoutReplacement(wekaDataSet, sampleSize, actualClasses, classIndices); } else { createSubsampleWithReplacement(wekaDataSet, sampleSize, actualClasses, classIndices); } }
From source file:NeuralNetwork.Network.java
public void learn(Instance instance) { //set up the first layer (input layer) layers.get(0).setFirstLayer(instance); //forwward propagate forwardPropagate();/*from w w w.j a va 2 s. c o m*/ //backward propgate starting from the last layer layers.get(layers.size() - 1).backwardPropagateOutputLayer((int) instance.classValue()); //backward propgate all other layers (except the first, since its inputs only) for (int i = layers.size() - 2; i > 0; i--) { layers.get(i).backwardPropagate(); } //update the weights of all layers (except the first, since its inputs only) for (int i = 1; i < layers.size(); i++) layers.get(i).updateWeights(); }
From source file:Neural_Network.NuralN.java
public static ArrayList<Double> predictDisease(String[] instanceData) { ArrayList<Double> predictions = new ArrayList<>(); if (!trained) { System.err.println("Neural netowrk is not trained...."); } else {//from w ww.j a v a 2 s. c o m Instance temp = toInstance(instanceData); try { temp.setClassValue(nN.classifyInstance(temp)); for (double d : nN.distributionForInstance(temp)) { // classify all the instance in array predictions.add(d); } // giving a class value to the instance of teh image // listing all the index predictions.add(temp.classValue()); // adding the closes value to last with its class value } catch (Exception e) { System.err.println(e.toString()); } } return predictions; }
From source file:newdtl.NewID3.java
/** * Creates an Id3 tree./*from ww w. java2 s. co m*/ * * @param data the training data * @exception Exception if tree failed to build */ private void makeTree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance dalam node ini if (data.numInstances() == 0) { splitAttribute = null; label = DOUBLE_MISSING_VALUE; classDistributions = new double[data.numClasses()]; //??? } else { // Mencari IG maksimum double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } // cek max IG int maxIG = maxIndex(infoGains); if (maxIG != -1) { splitAttribute = data.attribute(maxIndex(infoGains)); } else { Exception exception = new Exception("array null"); throw exception; } // Membuat daun jika IG-nya 0 if (Double.compare(infoGains[splitAttribute.index()], 0) == 0) { splitAttribute = null; classDistributions = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); classDistributions[(int) inst.classValue()]++; } normalizeClassDistribution(); label = maxIndex(classDistributions); classAttribute = data.classAttribute(); } else { // Membuat tree baru di bawah node ini Instances[] splitData = splitData(data, splitAttribute); children = new NewID3[splitAttribute.numValues()]; for (int j = 0; j < splitAttribute.numValues(); j++) { children[j] = new NewID3(); children[j].makeTree(splitData[j]); } } } }
From source file:newdtl.NewJ48.java
/** * Creates a J48 tree.//from w w w.j ava 2 s . c o m * * @param data the training data * @exception Exception if tree failed to build */ private void makeTree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance dalam node ini if (data.numInstances() == 0) { splitAttribute = null; label = DOUBLE_MISSING_VALUE; classDistributions = new double[data.numClasses()]; isLeaf = true; } else { // Mencari Gain Ratio maksimum double[] gainRatios = new double[data.numAttributes()]; double[] thresholds = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); double[] result = computeGainRatio(data, att); gainRatios[att.index()] = result[0]; thresholds[att.index()] = result[1]; } splitAttribute = data.attribute(maxIndex(gainRatios)); if (splitAttribute.isNumeric()) { splitThreshold = thresholds[maxIndex(gainRatios)]; } else { splitThreshold = Double.NaN; } classDistributions = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); classDistributions[(int) inst.classValue()]++; } // Membuat daun jika Gain Ratio-nya 0 if (Double.compare(gainRatios[splitAttribute.index()], 0) == 0) { splitAttribute = null; label = maxIndex(classDistributions); classAttribute = data.classAttribute(); isLeaf = true; } else { // Mengecek jika ada missing value if (isMissing(data, splitAttribute)) { // cari modus int index = modusIndex(data, splitAttribute); // ubah data yang punya missing value Enumeration dataEnum = data.enumerateInstances(); while (dataEnum.hasMoreElements()) { Instance inst = (Instance) dataEnum.nextElement(); if (inst.isMissing(splitAttribute)) { inst.setValue(splitAttribute, splitAttribute.value(index)); } } } // Membuat tree baru di bawah node ini Instances[] splitData; if (splitAttribute.isNumeric()) { splitData = splitData(data, splitAttribute, splitThreshold); children = new NewJ48[2]; for (int j = 0; j < 2; j++) { children[j] = new NewJ48(); children[j].makeTree(splitData[j]); } } else { splitData = splitData(data, splitAttribute); children = new NewJ48[splitAttribute.numValues()]; for (int j = 0; j < splitAttribute.numValues(); j++) { children[j] = new NewJ48(); children[j].makeTree(splitData[j]); } } isLeaf = false; } } }
From source file:org.esa.nest.gpf.SGD.java
/** * Updates the classifier with the given instance. * * @param instance the new training instance to include in the model * @exception Exception if the instance could not be incorporated in the * model./*from w ww.j a v a2s . c o m*/ */ @Override public void updateClassifier(Instance instance) throws Exception { if (!instance.classIsMissing()) { double wx = dotProd(instance, m_weights, instance.classIndex()); double y; double z; if (instance.classAttribute().isNominal()) { y = (instance.classValue() == 0) ? -1 : 1; z = y * (wx + m_weights[m_weights.length - 1]); } else { y = instance.classValue(); z = y - (wx + m_weights[m_weights.length - 1]); y = 1; } // Compute multiplier for weight decay double multiplier = 1.0; if (m_numInstances == 0) { multiplier = 1.0 - (m_learningRate * m_lambda) / m_t; } else { multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances; } for (int i = 0; i < m_weights.length - 1; i++) { m_weights[i] *= multiplier; } // Only need to do the following if the loss is non-zero if (m_loss != HINGE || (z < 1)) { // Compute Factor for updates double factor = m_learningRate * y * dloss(z); // Update coefficients for attributes int n1 = instance.numValues(); for (int p1 = 0; p1 < n1; p1++) { int indS = instance.index(p1); if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) { m_weights[indS] += factor * instance.valueSparse(p1); } } // update the bias m_weights[m_weights.length - 1] += factor; } m_t++; } }
From source file:org.if4071.myann.PerceptronTrainingRule.java
@Override public void buildClassifier(Instances data) throws Exception { getCapabilities().testWithFail(data); dataSet = new Instances(data); for (int i = 0; i < dataSet.numAttributes(); i++) { dataSet.deleteWithMissing(i);// w w w.j a v a2 s . com } //filterNominal nomToBinFilter.setInputFormat(dataSet); dataSet = Filter.useFilter(data, nomToBinFilter); normalizeFilter.setInputFormat(dataSet); dataSet = Filter.useFilter(data, normalizeFilter); //setTopology topology.addInputLayer(dataSet.numAttributes() - 1); topology.addOutputLayer(1); topology.connectNodes(); Node outputNode = topology.getOutputNode(0); outputNode.setBiasWeight(0.0); double threshold = 0.0; if (topology.isUseErrorThresholdTerminate()) { threshold = topology.getErrorThreshold(); } int epochError; int loop = 0; do { epochError = 0; int target; for (int i = 0; i < dataSet.numInstances(); i++) { Instance instance = dataSet.instance(i); if (instance.classValue() == 1) { target = 1; } else { target = -1; } for (int j = 0; j < topology.getWeights().size(); j++) { Weight weight = topology.getWeights().get(j); weight.getNode2().setInput( weight.getNode2().getInput() + (weight.getNode1().getOutput() * weight.getWeight())); } outputNode.setInput(outputNode.getInput() + (outputNode.getBias() * outputNode.getBiasWeight())); int output = Node.sign(outputNode.getInput()); for (int j = 0; j < topology.getWeights().size(); j++) { Weight weight = topology.getWeights().get(j); double delta = (topology.getLearningRate() * (target - output) * outputNode.getBias()) + topology.getMomentumRate() * outputNode.getPrevDeltaWeight(); weight.setPrevDeltaWeight(delta); weight.setWeight(weight.getWeight() + delta); } double biasWeight = outputNode.getBiasWeight(); double delta = (topology.getLearningRate() * (target - output) * outputNode.getBias()) + topology.getMomentumRate() * outputNode.getPrevDeltaWeight(); outputNode.setPrevDeltaWeight(delta); outputNode.setBiasWeight(biasWeight + delta); topology.resetNodesInput(); for (int j = 0; j < topology.getWeights().size(); j++) { Weight weight = topology.getWeights().get(j); weight.getNode2().setInput( weight.getNode2().getInput() + (weight.getNode1().getOutput() * weight.getWeight())); } outputNode.setInput(outputNode.getInput() + (outputNode.getBias() * outputNode.getBiasWeight())); output = Node.sign(outputNode.getInput()); int squaredError = (output - target) * (output - target); epochError += squaredError; } epochError = epochError / 2; loop++; } while ((epochError > threshold) && (!topology.isUseIterationTerminate() || (loop < topology.getIterationNumber()))); }