List of usage examples for weka.core Instance isMissing
public boolean isMissing(Attribute att);
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Computes class distribution for instance using decision tree. * /*www .j a v a2 s . com*/ * @param instance * the instance for which distribution is to be computed * @return the class distribution for the given instance */ public double[] distributionForInstance(Instance instance) throws NoSupportForMissingValuesException { if (m_Attribute == null) { return m_Distribution; } else if (instance.isMissing(m_Attribute)) { double[] d = new double[0]; for (int i = 0; i < m_Successors.length; ++i) { double[] dd = m_Successors[i].distributionForInstance(instance); if (d.length == 0 && dd.length > 0) d = new double[dd.length]; for (int j = 0; j < d.length; ++j) { d[j] += dd[j]; } } for (int j = 0; j < d.length; ++j) { d[j] /= m_Successors.length; } return d; } else { return m_Successors[(int) instance.value(m_Attribute)].distributionForInstance(instance); } }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Computes the entropy of a dataset./*from ww w . jav a2s. c o m*/ * * @param data * the data for which entropy is to be computed * @return the entropy of the data's class distribution */ private double computeEntropy(Instances data, Attribute att) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); int numInstances = 0; while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.isMissing(att)) continue; classCounts[(int) inst.classValue()]++; ++numInstances; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) numInstances; return entropy + Utils.log2(numInstances); }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Splits a dataset according to the values of a nominal attribute. * /*from www.j a v a 2 s.co m*/ * @param data * the data which is to be split * @param att * the attribute to be used for splitting * @return the sets of instances produced by the split */ private Instances[] splitData(Instances data, Attribute att) { numI = 0; splitDataSize = new int[att.numValues()]; Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.isMissing(att)) { // Add to all children. for (int k = 0; k < att.numValues(); ++k) { splitData[k].add(inst); } } else { int k = (int) inst.value(att); splitData[k].add(inst); splitDataSize[k]++; numI++; } } return splitData; }
From source file:newdtl.NewJ48.java
/** * Creates a J48 tree./*from w ww . j a v a 2s . com*/ * * @param data the training data * @exception Exception if tree failed to build */ private void makeTree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance dalam node ini if (data.numInstances() == 0) { splitAttribute = null; label = DOUBLE_MISSING_VALUE; classDistributions = new double[data.numClasses()]; isLeaf = true; } else { // Mencari Gain Ratio maksimum double[] gainRatios = new double[data.numAttributes()]; double[] thresholds = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); double[] result = computeGainRatio(data, att); gainRatios[att.index()] = result[0]; thresholds[att.index()] = result[1]; } splitAttribute = data.attribute(maxIndex(gainRatios)); if (splitAttribute.isNumeric()) { splitThreshold = thresholds[maxIndex(gainRatios)]; } else { splitThreshold = Double.NaN; } classDistributions = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); classDistributions[(int) inst.classValue()]++; } // Membuat daun jika Gain Ratio-nya 0 if (Double.compare(gainRatios[splitAttribute.index()], 0) == 0) { splitAttribute = null; label = maxIndex(classDistributions); classAttribute = data.classAttribute(); isLeaf = true; } else { // Mengecek jika ada missing value if (isMissing(data, splitAttribute)) { // cari modus int index = modusIndex(data, splitAttribute); // ubah data yang punya missing value Enumeration dataEnum = data.enumerateInstances(); while (dataEnum.hasMoreElements()) { Instance inst = (Instance) dataEnum.nextElement(); if (inst.isMissing(splitAttribute)) { inst.setValue(splitAttribute, splitAttribute.value(index)); } } } // Membuat tree baru di bawah node ini Instances[] splitData; if (splitAttribute.isNumeric()) { splitData = splitData(data, splitAttribute, splitThreshold); children = new NewJ48[2]; for (int j = 0; j < 2; j++) { children[j] = new NewJ48(); children[j].makeTree(splitData[j]); } } else { splitData = splitData(data, splitAttribute); children = new NewJ48[splitAttribute.numValues()]; for (int j = 0; j < splitAttribute.numValues(); j++) { children[j] = new NewJ48(); children[j].makeTree(splitData[j]); } } isLeaf = false; } } }
From source file:newdtl.NewJ48.java
/** * search data that has missing value for attribute * * @param data the data for searching/* w w w. j a va2 s .c o m*/ * @param att the attribute for searching * @return if data has missing value for attribute */ private boolean isMissing(Instances data, Attribute att) { boolean isMissingValue = false; Enumeration dataEnum = data.enumerateInstances(); while (dataEnum.hasMoreElements() && !isMissingValue) { Instance inst = (Instance) dataEnum.nextElement(); if (inst.isMissing(att)) { isMissingValue = true; } } return isMissingValue; }
From source file:newdtl.NewJ48.java
/** * search index of attribute that has most common value * * @param data the data for searching/* ww w. j a va2 s . com*/ * @param att the attribute for searching * @return index of attribute that has most common value */ private int modusIndex(Instances data, Attribute att) { // cari modus int[] modus = new int[att.numValues()]; Enumeration dataEnumeration = data.enumerateInstances(); while (dataEnumeration.hasMoreElements()) { Instance inst = (Instance) dataEnumeration.nextElement(); if (!inst.isMissing(att)) { modus[(int) inst.value(att)]++; } } // cari modus terbesar int indexMax = 0; for (int i = 1; i < modus.length; ++i) { if (modus[i] > modus[indexMax]) { indexMax = i; } } return indexMax; }
From source file:org.openml.webapplication.fantail.dc.statistical.IncompleteInstanceCount.java
License:Open Source License
@Override public Map<String, Double> characterize(Instances instances) { // Get default class final int instance_count = instances.numInstances(), attrib_count = instances.numAttributes(); int count = 0; for (int i = 0; i < instance_count; i++) { Instance instance = instances.instance(i); for (int j = 0; j < attrib_count; j++) { if (instance.isMissing(j)) { count++;/*from w w w. ja v a 2 s. co m*/ break; } } } Map<String, Double> qualities = new HashMap<String, Double>(); qualities.put(ids[0], (double) count); return qualities; }
From source file:org.openml.webapplication.fantail.dc.statistical.MissingValues.java
License:Open Source License
@Override public Map<String, Double> characterize(Instances instances) { final int instance_count = instances.numInstances(), attrib_count = instances.numAttributes(); int count = 0; for (int i = 0; i < instance_count; i++) { Instance instance = instances.instance(i); for (int j = 0; j < attrib_count; j++) { if (instance.isMissing(j)) { count++;/*from ww w .ja v a2s . c o m*/ } } } Map<String, Double> qualities = new HashMap<String, Double>(); qualities.put(ids[0], (double) count); qualities.put(ids[1], 1.0 * count / (instances.numAttributes() * instances.numInstances())); return qualities; }
From source file:org.openml.webapplication.fantail.dc.statistical.Statistical.java
License:Open Source License
private static double findKurtosis(Instances instances, double mean, double stddev, int attrib) { final double S4 = Math.pow(stddev, 4), YBAR = mean; double sum = 0.0; final int COUNT = instances.numInstances(); int n = 0;//from w w w .j av a 2s .com if (S4 == 0) { return 0; } for (int i = 0; i < COUNT; i++) { Instance instance = instances.instance(i); if (!instance.isMissing(attrib)) { n++; sum += Math.pow(instance.value(attrib) - YBAR, 4); } } return (sum / ((n - 1) * S4)) - 3; }
From source file:org.openml.webapplication.fantail.dc.statistical.Statistical.java
License:Open Source License
private static double findSkewness(Instances instances, double mean, double stddev, int attrib) { final double S3 = Math.pow(stddev, 3), YBAR = mean; double sum = 0.0; final int COUNT = instances.numInstances(); int n = 0;// w w w. j a va2 s.c o m if (S3 == 0) { return 0; } for (int i = 0; i < COUNT; i++) { Instance instance = instances.instance(i); if (!instance.isMissing(attrib)) { n++; sum += Math.pow(instance.value(attrib) - YBAR, 3); } } return (sum / ((n - 1) * S3)); }