List of usage examples for weka.core Instance isMissing
public boolean isMissing(Attribute att);
From source file:myclassifier.myC45Pack.SplitModel.java
private void handleNumericAttribute(Instances dataSet) throws Exception { int firstMiss; int next = 1; int last = 0; int splitIndex = -1; double currentInfoGain; double currentGainRatio; double minSplit; Instance instance; int i;/*from ww w. j ava 2s.c o m*/ boolean instanceMissing = false; // Current attribute is a numeric attribute. classDist = new ClassDistribution(2, dataSet.numClasses()); // Only Instances with known values are relevant. Enumeration instanceEnum = dataSet.enumerateInstances(); i = 0; while ((instanceEnum.hasMoreElements() && (!instanceMissing))) { instance = (Instance) instanceEnum.nextElement(); if (instance.isMissing(attribIndex)) { instanceMissing = true; } else { classDist.addInstance(1, instance); i++; } } firstMiss = i; // Compute minimum number of Instances required in each // subset. minSplit = 0.1 * (classDist.getTotalWeight()) / ((double) dataSet.numClasses()); if (minSplit <= minInstances) { minSplit = minInstances; } else if (minSplit > 25) { minSplit = 25; } // Enough Instances with known values? if ((double) firstMiss < 2 * minSplit) { return; } // Compute values of criteria for all possible split // indices. //defaultEnt = infoGainCrit.oldEnt(m_distribution); while (next < firstMiss) { if (dataSet.instance(next - 1).value(attribIndex) + 1e-5 < dataSet.instance(next).value(attribIndex)) { // Move class values for all Instances up to next // possible split point. classDist.moveInstancesWithRange(1, 0, dataSet, last, next); // Check if enough Instances in each subset and compute // values for criteria. if ((classDist.w_perSubdataset[0] >= minSplit) && (classDist.w_perSubdataset[1] >= minSplit)) { currentInfoGain = classDist.calculateInfoGain(totalWeights); currentGainRatio = classDist.calculateGainRatio(totalWeights); if (currentGainRatio >= gainRatio) { infoGain = currentInfoGain; gainRatio = currentGainRatio; splitIndex = next - 1; } numOfSplitPoints++; } last = next; } next++; } // Was there any useful split? if (numOfSplitPoints == 0) { return; } // Compute modified information gain for best split. infoGain = infoGain - (classDist.log2(numOfSplitPoints) / totalWeights); if (infoGain > 0) { // Set instance variables' values to values for // best split. numSubsets = 2; splitPointValue = (dataSet.instance(splitIndex + 1).value(attribIndex) + dataSet.instance(splitIndex).value(attribIndex)) / 2; // In case we have a numerical precision problem we need to choose the // smaller value if (splitPointValue == dataSet.instance(splitIndex + 1).value(attribIndex)) { splitPointValue = dataSet.instance(splitIndex).value(attribIndex); } // Restore distributioN for best split. classDist = new ClassDistribution(2, dataSet.numClasses()); classDist.addRange(0, dataSet, 0, splitIndex + 1); classDist.addRange(1, dataSet, splitIndex + 1, firstMiss); // Compute modified gain ratio for best split. gainRatio = classDist.calculateGainRatio(infoGain); } }
From source file:myclassifier.myC45Pack.SplitModel.java
public final void setSplitPoint(Instances allInstances) { double newSplitPoint = -Double.MAX_VALUE; double temp;/*from w w w . j a v a2s .c o m*/ Instance instance; if ((allInstances.attribute(attribIndex).isNumeric()) && (numSubsets > 1)) { Enumeration instancesEnum = allInstances.enumerateInstances(); while (instancesEnum.hasMoreElements()) { instance = (Instance) instancesEnum.nextElement(); if (!instance.isMissing(attribIndex)) { temp = instance.value(attribIndex); if ((temp > newSplitPoint) && (temp <= splitPointValue)) { newSplitPoint = temp; } } } splitPointValue = newSplitPoint; } }
From source file:myclassifier.myC45Pack.SplitModel.java
@Override public double[] getWeights(Instance instance) { double[] weights; if (instance.isMissing(attribIndex)) { weights = new double[numSubsets]; for (int i = 0; i < numSubsets; i++) { weights[i] = classDist.w_perSubdataset[i] / classDist.getTotalWeight(); }/*from www . j av a2s. c o m*/ return weights; } else { return null; } }
From source file:myclassifier.myC45Pack.SplitModel.java
@Override public int getSubsetIndex(Instance instance) throws Exception { if (instance.isMissing(attribIndex)) { return -1; } else {/*w w w . ja v a 2 s. c o m*/ if (instance.attribute(attribIndex).isNominal()) return (int) instance.value(attribIndex); else if (instance.value(attribIndex) <= splitPointValue) { return 0; } else { return 1; } } }
From source file:myclassifier.Util.java
public static double calculateIG(Instances instances, Attribute attribute) { double IG = calculateE(instances); int missingCount = 0; Instances[] splitData = splitData(instances, attribute); for (int j = 0; j < attribute.numValues(); j++) { if (splitData[j].numInstances() > 0) { IG -= ((double) splitData[j].numInstances() / (double) instances.numInstances()) * calculateE(splitData[j]); }// w ww .jav a2 s. co m } for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); if (instance.isMissing(attribute)) missingCount++; } return IG * (instances.numInstances() - missingCount / instances.numInstances()); }
From source file:myclusterer.MyKMeans.java
private Instance createCentroid(Instances members) { double[] vals = new double[members.numAttributes()]; double[][] nominalDists = new double[members.numAttributes()][]; double[] weightMissing = new double[members.numAttributes()]; double[] weightNonMissing = new double[members.numAttributes()]; for (int j = 0; j < members.numAttributes(); j++) { if (members.attribute(j).isNominal()) { nominalDists[j] = new double[members.attribute(j).numValues()]; }//from www .ja v a 2 s .com } for (int i = 0; i < members.numInstances(); ++i) { Instance inst = members.instance(i); for (int j = 0; j < members.numAttributes(); j++) { if (inst.isMissing(j)) { weightMissing[j] += inst.weight(); } else { weightNonMissing[j] += inst.weight(); if (members.attribute(j).isNumeric()) vals[j] += inst.weight() * inst.value(j); else nominalDists[j][(int) inst.value(j)] += inst.weight(); } } } for (int i = 0; i < members.numAttributes(); i++) { if (members.attribute(i).isNumeric()) { if (weightNonMissing[i] > 0) { vals[i] /= weightNonMissing[i]; } else { vals[i] = Instance.missingValue(); } } else { double max = -Double.MAX_VALUE; double maxIndex = -1; for (int j = 0; j < nominalDists[i].length; j++) { if (nominalDists[i][j] > max) { max = nominalDists[i][j]; maxIndex = j; } vals[i] = max < weightMissing[i] ? Instance.missingValue() : maxIndex; } } } return new Instance(1.0, vals); }
From source file:myid3andc45classifier.Model.MyC45.java
@Override public void buildClassifier(Instances data) throws Exception { getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass();/*from w ww . j ava 2 s.c om*/ Enumeration enumAtt = data.enumerateAttributes(); while (enumAtt.hasMoreElements()) { Attribute attr = (Attribute) enumAtt.nextElement(); if (attr.isNumeric()) { ArrayList<Double> mid = new ArrayList<Double>(); Instances savedData = null; double temp, max = Double.NEGATIVE_INFINITY; // TODO: split nominal data.sort(attr); for (int i = 0; i < data.numInstances() - 1; i++) { if (data.instance(i).classValue() != data.instance(i + 1).classValue()) { if (data.attribute(attr.name() + " " + (data.instance(i + 1).value(attr) + data.instance(i).value(attr)) / 2) == null) { data = convertInstances(data, attr, (data.instance(i + 1).value(attr) + data.instance(i).value(attr)) / 2); //temp = computeInfoGainRatio(newData, newData.attribute(newData.numAttributes()-1)); //System.out.println("attribute "+newData.attribute(newData.numAttributes()-1).name()); //if (temp > max) { // max = temp; // savedData = newData; //} } } } //Penanganan Missing Value AttributeStats attributeStats = data.attributeStats(attr.index()); double mean = attributeStats.numericStats.mean; if (Double.isNaN(mean)) mean = 0; // Replace missing value with mean Enumeration instEnumerate = data.enumerateInstances(); while (instEnumerate.hasMoreElements()) { Instance instance = (Instance) instEnumerate.nextElement(); if (instance.isMissing(attr.index())) { instance.setValue(attr.index(), mean); } } //data = new Instances(savedData); } else { //Penanganan Missing Value AttributeStats attributeStats = data.attributeStats(attr.index()); int maxIndex = 0; for (int i = 1; i < attr.numValues(); i++) { if (attributeStats.nominalCounts[maxIndex] < attributeStats.nominalCounts[i]) { maxIndex = i; } } // Replace missing value with max index Enumeration instEnumerate = data.enumerateInstances(); while (instEnumerate.hasMoreElements()) { Instance instance = (Instance) instEnumerate.nextElement(); if (instance.isMissing(attr.index())) { instance.setValue(attr.index(), maxIndex); } } } } makeMyC45Tree(data); }
From source file:naivebayes.NBTubesAI.java
@Override public double classifyInstance(Instance instance) throws Exception { int jumlahKelas = instance.classAttribute().numValues(); double[] classifyResult = new double[jumlahKelas]; //iterasi menghitung probabilitas untuk seluruh kelas for (int i = 0; i < jumlahKelas; i++) { //Rumus probabilitas Naive Bayes here classifyResult[i] = (double) classCount.get(i + 0.0) / numInstance; Enumeration<Attribute> enumAttr = instance.enumerateAttributes(); while (enumAttr.hasMoreElements()) { Attribute temp = enumAttr.nextElement(); if (!instance.isMissing(temp)) { try { classifyResult[i] = classifyResult[i] * distribution.get(temp.name()).get(instance.stringValue(temp)).get(i + 0.0); } catch (NullPointerException e) { classifyResult[i] = 0; }/*from w w w .j av a 2 s . c o m*/ } } } double maxValue = 0; int currentIndex = 0; for (int i = 0; i < jumlahKelas; i++) { if (maxValue < classifyResult[i]) { currentIndex = i; maxValue = classifyResult[i]; } } return currentIndex; }
From source file:naivebayes.NBTubesAI.java
@Override public double[] distributionForInstance(Instance instance) throws Exception { int jumlahKelas = instance.classAttribute().numValues(); double[] classifyResult = new double[jumlahKelas]; //iterasi menghitung probabilitas untuk seluruh kelas for (int i = 0; i < jumlahKelas; i++) { //Rumus probabilitas Naive Bayes here classifyResult[i] = (double) classCount.get(i + 0.0) / numInstance; Enumeration<Attribute> enumAttr = instance.enumerateAttributes(); while (enumAttr.hasMoreElements()) { Attribute temp = enumAttr.nextElement(); if (!instance.isMissing(temp)) { try { classifyResult[i] = classifyResult[i] * distribution.get(temp.name()).get(instance.stringValue(temp)).get(i + 0.0); } catch (NullPointerException e) { }// w w w . j av a2s . c o m } } } return classifyResult; }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Classifies a given test instance using the decision tree. * /*from w w w . ja va 2 s.c om*/ * @param instance * the instance to be classified * @return the classification */ public double classifyInstance(Instance instance) { if (m_Attribute == null) { return m_ClassValue; } else if (instance.isMissing(m_Attribute)) { try { // Use superclass implementation, which uses distributionForInstance. return super.classifyInstance(instance); } catch (Exception x) { x.printStackTrace(); Assert.UNREACHABLE(); return 0.; } } else { return m_Successors[(int) instance.value(m_Attribute)].classifyInstance(instance); } }