Example usage for weka.core Instance isMissing

List of usage examples for weka.core Instance isMissing

Introduction

In this page you can find the example usage for weka.core Instance isMissing.

Prototype

public boolean isMissing(Attribute att);

Source Link

Document

Tests if a specific value is "missing".

Usage

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Computes class distribution for instance using decision tree.
 * /*www .j  a v  a2  s  . com*/
 * @param instance
 *            the instance for which distribution is to be computed
 * @return the class distribution for the given instance
 */
public double[] distributionForInstance(Instance instance) throws NoSupportForMissingValuesException {
    if (m_Attribute == null) {
        return m_Distribution;
    } else if (instance.isMissing(m_Attribute)) {
        double[] d = new double[0];
        for (int i = 0; i < m_Successors.length; ++i) {
            double[] dd = m_Successors[i].distributionForInstance(instance);
            if (d.length == 0 && dd.length > 0)
                d = new double[dd.length];
            for (int j = 0; j < d.length; ++j) {
                d[j] += dd[j];
            }
        }
        for (int j = 0; j < d.length; ++j) {
            d[j] /= m_Successors.length;
        }
        return d;
    } else {
        return m_Successors[(int) instance.value(m_Attribute)].distributionForInstance(instance);
    }
}

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Computes the entropy of a dataset./*from ww w .  jav  a2s. c o m*/
 * 
 * @param data
 *            the data for which entropy is to be computed
 * @return the entropy of the data's class distribution
 */
private double computeEntropy(Instances data, Attribute att) throws Exception {
    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    int numInstances = 0;
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (inst.isMissing(att))
            continue;
        classCounts[(int) inst.classValue()]++;
        ++numInstances;
    }
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0) {
            entropy -= classCounts[j] * Utils.log2(classCounts[j]);
        }
    }
    entropy /= (double) numInstances;
    return entropy + Utils.log2(numInstances);
}

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Splits a dataset according to the values of a nominal attribute.
 * /*from www.j  a v a 2  s.co m*/
 * @param data
 *            the data which is to be split
 * @param att
 *            the attribute to be used for splitting
 * @return the sets of instances produced by the split
 */
private Instances[] splitData(Instances data, Attribute att) {
    numI = 0;
    splitDataSize = new int[att.numValues()];
    Instances[] splitData = new Instances[att.numValues()];
    for (int j = 0; j < att.numValues(); j++) {
        splitData[j] = new Instances(data, data.numInstances());
    }
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (inst.isMissing(att)) {
            // Add to all children.
            for (int k = 0; k < att.numValues(); ++k) {
                splitData[k].add(inst);
            }
        } else {
            int k = (int) inst.value(att);
            splitData[k].add(inst);
            splitDataSize[k]++;
            numI++;
        }
    }
    return splitData;
}

From source file:newdtl.NewJ48.java

/**
 * Creates a J48 tree./*from w ww  .  j a  v a  2s  .  com*/
 *
 * @param data the training data
 * @exception Exception if tree failed to build
 */
private void makeTree(Instances data) throws Exception {

    // Mengecek apakah tidak terdapat instance dalam node ini
    if (data.numInstances() == 0) {
        splitAttribute = null;
        label = DOUBLE_MISSING_VALUE;
        classDistributions = new double[data.numClasses()];
        isLeaf = true;
    } else {
        // Mencari Gain Ratio maksimum
        double[] gainRatios = new double[data.numAttributes()];
        double[] thresholds = new double[data.numAttributes()];

        Enumeration attEnum = data.enumerateAttributes();
        while (attEnum.hasMoreElements()) {
            Attribute att = (Attribute) attEnum.nextElement();
            double[] result = computeGainRatio(data, att);
            gainRatios[att.index()] = result[0];
            thresholds[att.index()] = result[1];
        }

        splitAttribute = data.attribute(maxIndex(gainRatios));

        if (splitAttribute.isNumeric()) {
            splitThreshold = thresholds[maxIndex(gainRatios)];
        } else {
            splitThreshold = Double.NaN;
        }

        classDistributions = new double[data.numClasses()];
        for (int i = 0; i < data.numInstances(); i++) {
            Instance inst = (Instance) data.instance(i);
            classDistributions[(int) inst.classValue()]++;
        }

        // Membuat daun jika Gain Ratio-nya 0
        if (Double.compare(gainRatios[splitAttribute.index()], 0) == 0) {
            splitAttribute = null;

            label = maxIndex(classDistributions);
            classAttribute = data.classAttribute();
            isLeaf = true;
        } else {
            // Mengecek jika ada missing value
            if (isMissing(data, splitAttribute)) {
                // cari modus
                int index = modusIndex(data, splitAttribute);

                // ubah data yang punya missing value
                Enumeration dataEnum = data.enumerateInstances();
                while (dataEnum.hasMoreElements()) {
                    Instance inst = (Instance) dataEnum.nextElement();
                    if (inst.isMissing(splitAttribute)) {
                        inst.setValue(splitAttribute, splitAttribute.value(index));
                    }
                }
            }

            // Membuat tree baru di bawah node ini
            Instances[] splitData;
            if (splitAttribute.isNumeric()) {
                splitData = splitData(data, splitAttribute, splitThreshold);
                children = new NewJ48[2];
                for (int j = 0; j < 2; j++) {
                    children[j] = new NewJ48();
                    children[j].makeTree(splitData[j]);
                }
            } else {
                splitData = splitData(data, splitAttribute);
                children = new NewJ48[splitAttribute.numValues()];
                for (int j = 0; j < splitAttribute.numValues(); j++) {
                    children[j] = new NewJ48();
                    children[j].makeTree(splitData[j]);
                }
            }
            isLeaf = false;
        }
    }
}

From source file:newdtl.NewJ48.java

/**
 * search data that has missing value for attribute
 *
 * @param data the data for searching/*  w  w  w. j a va2 s  .c o m*/
 * @param att the attribute for searching
 * @return if data has missing value for attribute
 */
private boolean isMissing(Instances data, Attribute att) {

    boolean isMissingValue = false;
    Enumeration dataEnum = data.enumerateInstances();

    while (dataEnum.hasMoreElements() && !isMissingValue) {
        Instance inst = (Instance) dataEnum.nextElement();
        if (inst.isMissing(att)) {
            isMissingValue = true;
        }
    }

    return isMissingValue;
}

From source file:newdtl.NewJ48.java

/**
 * search index of attribute that has most common value
 *
 * @param data the data for searching/*  ww w. j  a  va2  s  . com*/
 * @param att the attribute for searching
 * @return index of attribute that has most common value
 */
private int modusIndex(Instances data, Attribute att) {
    // cari modus
    int[] modus = new int[att.numValues()];
    Enumeration dataEnumeration = data.enumerateInstances();

    while (dataEnumeration.hasMoreElements()) {
        Instance inst = (Instance) dataEnumeration.nextElement();
        if (!inst.isMissing(att)) {
            modus[(int) inst.value(att)]++;
        }
    }

    // cari modus terbesar
    int indexMax = 0;
    for (int i = 1; i < modus.length; ++i) {
        if (modus[i] > modus[indexMax]) {
            indexMax = i;
        }
    }
    return indexMax;
}

From source file:org.openml.webapplication.fantail.dc.statistical.IncompleteInstanceCount.java

License:Open Source License

@Override
public Map<String, Double> characterize(Instances instances) {
    // Get default class
    final int instance_count = instances.numInstances(), attrib_count = instances.numAttributes();
    int count = 0;

    for (int i = 0; i < instance_count; i++) {
        Instance instance = instances.instance(i);

        for (int j = 0; j < attrib_count; j++) {
            if (instance.isMissing(j)) {
                count++;/*from   w  w w.  ja v  a 2  s.  co m*/
                break;
            }
        }
    }

    Map<String, Double> qualities = new HashMap<String, Double>();
    qualities.put(ids[0], (double) count);
    return qualities;
}

From source file:org.openml.webapplication.fantail.dc.statistical.MissingValues.java

License:Open Source License

@Override
public Map<String, Double> characterize(Instances instances) {
    final int instance_count = instances.numInstances(), attrib_count = instances.numAttributes();
    int count = 0;

    for (int i = 0; i < instance_count; i++) {
        Instance instance = instances.instance(i);

        for (int j = 0; j < attrib_count; j++) {
            if (instance.isMissing(j)) {
                count++;/*from  ww w .ja  v  a2s  .  c o  m*/
            }
        }
    }

    Map<String, Double> qualities = new HashMap<String, Double>();
    qualities.put(ids[0], (double) count);
    qualities.put(ids[1], 1.0 * count / (instances.numAttributes() * instances.numInstances()));
    return qualities;
}

From source file:org.openml.webapplication.fantail.dc.statistical.Statistical.java

License:Open Source License

private static double findKurtosis(Instances instances, double mean, double stddev, int attrib) {
    final double S4 = Math.pow(stddev, 4), YBAR = mean;
    double sum = 0.0;
    final int COUNT = instances.numInstances();
    int n = 0;//from w w w  .j  av  a  2s .com

    if (S4 == 0) {
        return 0;
    }

    for (int i = 0; i < COUNT; i++) {
        Instance instance = instances.instance(i);
        if (!instance.isMissing(attrib)) {
            n++;
            sum += Math.pow(instance.value(attrib) - YBAR, 4);
        }
    }

    return (sum / ((n - 1) * S4)) - 3;
}

From source file:org.openml.webapplication.fantail.dc.statistical.Statistical.java

License:Open Source License

private static double findSkewness(Instances instances, double mean, double stddev, int attrib) {
    final double S3 = Math.pow(stddev, 3), YBAR = mean;
    double sum = 0.0;
    final int COUNT = instances.numInstances();
    int n = 0;// w  w  w.  j  a  va2  s.c  o  m

    if (S3 == 0) {
        return 0;
    }

    for (int i = 0; i < COUNT; i++) {
        Instance instance = instances.instance(i);
        if (!instance.isMissing(attrib)) {
            n++;
            sum += Math.pow(instance.value(attrib) - YBAR, 3);
        }
    }

    return (sum / ((n - 1) * S3));
}