Example usage for weka.core Instance isMissing

List of usage examples for weka.core Instance isMissing

Introduction

In this page you can find the example usage for weka.core Instance isMissing.

Prototype

public boolean isMissing(Attribute att);

Source Link

Document

Tests if a specific value is "missing".

Usage

From source file:myclassifier.myC45Pack.SplitModel.java

private void handleNumericAttribute(Instances dataSet) throws Exception {

    int firstMiss;
    int next = 1;
    int last = 0;
    int splitIndex = -1;
    double currentInfoGain;
    double currentGainRatio;
    double minSplit;
    Instance instance;
    int i;/*from  ww w.  j  ava  2s.c o m*/
    boolean instanceMissing = false;

    // Current attribute is a numeric attribute.
    classDist = new ClassDistribution(2, dataSet.numClasses());

    // Only Instances with known values are relevant.
    Enumeration instanceEnum = dataSet.enumerateInstances();
    i = 0;
    while ((instanceEnum.hasMoreElements() && (!instanceMissing))) {
        instance = (Instance) instanceEnum.nextElement();
        if (instance.isMissing(attribIndex)) {
            instanceMissing = true;
        } else {
            classDist.addInstance(1, instance);
            i++;
        }
    }
    firstMiss = i;

    // Compute minimum number of Instances required in each
    // subset.
    minSplit = 0.1 * (classDist.getTotalWeight()) / ((double) dataSet.numClasses());
    if (minSplit <= minInstances) {
        minSplit = minInstances;
    } else if (minSplit > 25) {
        minSplit = 25;
    }
    // Enough Instances with known values?
    if ((double) firstMiss < 2 * minSplit) {
        return;
    }
    // Compute values of criteria for all possible split
    // indices.
    //defaultEnt = infoGainCrit.oldEnt(m_distribution);
    while (next < firstMiss) {
        if (dataSet.instance(next - 1).value(attribIndex) + 1e-5 < dataSet.instance(next).value(attribIndex)) {

            // Move class values for all Instances up to next 
            // possible split point.
            classDist.moveInstancesWithRange(1, 0, dataSet, last, next);

            // Check if enough Instances in each subset and compute
            // values for criteria.
            if ((classDist.w_perSubdataset[0] >= minSplit) && (classDist.w_perSubdataset[1] >= minSplit)) {
                currentInfoGain = classDist.calculateInfoGain(totalWeights);
                currentGainRatio = classDist.calculateGainRatio(totalWeights);
                if (currentGainRatio >= gainRatio) {
                    infoGain = currentInfoGain;
                    gainRatio = currentGainRatio;
                    splitIndex = next - 1;
                }
                numOfSplitPoints++;
            }
            last = next;
        }
        next++;
    }

    // Was there any useful split?
    if (numOfSplitPoints == 0) {
        return;
    }
    // Compute modified information gain for best split.
    infoGain = infoGain - (classDist.log2(numOfSplitPoints) / totalWeights);
    if (infoGain > 0) {
        // Set instance variables' values to values for
        // best split.
        numSubsets = 2;
        splitPointValue = (dataSet.instance(splitIndex + 1).value(attribIndex)
                + dataSet.instance(splitIndex).value(attribIndex)) / 2;

        // In case we have a numerical precision problem we need to choose the
        // smaller value
        if (splitPointValue == dataSet.instance(splitIndex + 1).value(attribIndex)) {
            splitPointValue = dataSet.instance(splitIndex).value(attribIndex);
        }
        // Restore distributioN for best split.
        classDist = new ClassDistribution(2, dataSet.numClasses());
        classDist.addRange(0, dataSet, 0, splitIndex + 1);
        classDist.addRange(1, dataSet, splitIndex + 1, firstMiss);
        // Compute modified gain ratio for best split.
        gainRatio = classDist.calculateGainRatio(infoGain);
    }
}

From source file:myclassifier.myC45Pack.SplitModel.java

public final void setSplitPoint(Instances allInstances) {

    double newSplitPoint = -Double.MAX_VALUE;
    double temp;/*from w w  w  .  j a  v  a2s .c o  m*/
    Instance instance;

    if ((allInstances.attribute(attribIndex).isNumeric()) && (numSubsets > 1)) {
        Enumeration instancesEnum = allInstances.enumerateInstances();
        while (instancesEnum.hasMoreElements()) {
            instance = (Instance) instancesEnum.nextElement();
            if (!instance.isMissing(attribIndex)) {
                temp = instance.value(attribIndex);
                if ((temp > newSplitPoint) && (temp <= splitPointValue)) {
                    newSplitPoint = temp;
                }
            }
        }
        splitPointValue = newSplitPoint;
    }
}

From source file:myclassifier.myC45Pack.SplitModel.java

@Override
public double[] getWeights(Instance instance) {
    double[] weights;

    if (instance.isMissing(attribIndex)) {
        weights = new double[numSubsets];
        for (int i = 0; i < numSubsets; i++) {
            weights[i] = classDist.w_perSubdataset[i] / classDist.getTotalWeight();
        }/*from   www .  j av a2s. c o  m*/
        return weights;
    } else {
        return null;
    }
}

From source file:myclassifier.myC45Pack.SplitModel.java

@Override
public int getSubsetIndex(Instance instance) throws Exception {
    if (instance.isMissing(attribIndex)) {
        return -1;
    } else {/*w  w w  .  ja  v  a 2  s. c  o m*/
        if (instance.attribute(attribIndex).isNominal())
            return (int) instance.value(attribIndex);
        else if (instance.value(attribIndex) <= splitPointValue) {
            return 0;
        } else {
            return 1;
        }
    }
}

From source file:myclassifier.Util.java

public static double calculateIG(Instances instances, Attribute attribute) {
    double IG = calculateE(instances);
    int missingCount = 0;
    Instances[] splitData = splitData(instances, attribute);
    for (int j = 0; j < attribute.numValues(); j++) {
        if (splitData[j].numInstances() > 0) {
            IG -= ((double) splitData[j].numInstances() / (double) instances.numInstances())
                    * calculateE(splitData[j]);
        }// w ww  .jav  a2 s.  co m
    }

    for (int i = 0; i < instances.numInstances(); i++) {
        Instance instance = instances.instance(i);
        if (instance.isMissing(attribute))
            missingCount++;
    }
    return IG * (instances.numInstances() - missingCount / instances.numInstances());
}

From source file:myclusterer.MyKMeans.java

private Instance createCentroid(Instances members) {
    double[] vals = new double[members.numAttributes()];
    double[][] nominalDists = new double[members.numAttributes()][];
    double[] weightMissing = new double[members.numAttributes()];
    double[] weightNonMissing = new double[members.numAttributes()];

    for (int j = 0; j < members.numAttributes(); j++) {
        if (members.attribute(j).isNominal()) {
            nominalDists[j] = new double[members.attribute(j).numValues()];
        }//from  www .ja  v a 2  s  .com
    }
    for (int i = 0; i < members.numInstances(); ++i) {
        Instance inst = members.instance(i);
        for (int j = 0; j < members.numAttributes(); j++) {
            if (inst.isMissing(j)) {
                weightMissing[j] += inst.weight();
            } else {
                weightNonMissing[j] += inst.weight();
                if (members.attribute(j).isNumeric())
                    vals[j] += inst.weight() * inst.value(j);
                else
                    nominalDists[j][(int) inst.value(j)] += inst.weight();
            }
        }
    }
    for (int i = 0; i < members.numAttributes(); i++) {
        if (members.attribute(i).isNumeric()) {
            if (weightNonMissing[i] > 0) {
                vals[i] /= weightNonMissing[i];
            } else {
                vals[i] = Instance.missingValue();
            }
        } else {
            double max = -Double.MAX_VALUE;
            double maxIndex = -1;
            for (int j = 0; j < nominalDists[i].length; j++) {
                if (nominalDists[i][j] > max) {
                    max = nominalDists[i][j];
                    maxIndex = j;
                }
                vals[i] = max < weightMissing[i] ? Instance.missingValue() : maxIndex;
            }
        }
    }
    return new Instance(1.0, vals);
}

From source file:myid3andc45classifier.Model.MyC45.java

@Override
public void buildClassifier(Instances data) throws Exception {
    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();/*from  w  ww .  j ava 2  s.c  om*/

    Enumeration enumAtt = data.enumerateAttributes();
    while (enumAtt.hasMoreElements()) {
        Attribute attr = (Attribute) enumAtt.nextElement();
        if (attr.isNumeric()) {
            ArrayList<Double> mid = new ArrayList<Double>();
            Instances savedData = null;
            double temp, max = Double.NEGATIVE_INFINITY;
            // TODO: split nominal
            data.sort(attr);
            for (int i = 0; i < data.numInstances() - 1; i++) {
                if (data.instance(i).classValue() != data.instance(i + 1).classValue()) {
                    if (data.attribute(attr.name() + " "
                            + (data.instance(i + 1).value(attr) + data.instance(i).value(attr)) / 2) == null) {
                        data = convertInstances(data, attr,
                                (data.instance(i + 1).value(attr) + data.instance(i).value(attr)) / 2);
                        //temp = computeInfoGainRatio(newData, newData.attribute(newData.numAttributes()-1));
                        //System.out.println("attribute "+newData.attribute(newData.numAttributes()-1).name());
                        //if (temp > max) {
                        //    max = temp;
                        //    savedData = newData;
                        //}
                    }
                }
            }

            //Penanganan Missing Value
            AttributeStats attributeStats = data.attributeStats(attr.index());
            double mean = attributeStats.numericStats.mean;
            if (Double.isNaN(mean))
                mean = 0;
            // Replace missing value with mean
            Enumeration instEnumerate = data.enumerateInstances();
            while (instEnumerate.hasMoreElements()) {
                Instance instance = (Instance) instEnumerate.nextElement();
                if (instance.isMissing(attr.index())) {
                    instance.setValue(attr.index(), mean);
                }
            }

            //data = new Instances(savedData);
        } else {
            //Penanganan Missing Value
            AttributeStats attributeStats = data.attributeStats(attr.index());
            int maxIndex = 0;
            for (int i = 1; i < attr.numValues(); i++) {
                if (attributeStats.nominalCounts[maxIndex] < attributeStats.nominalCounts[i]) {
                    maxIndex = i;
                }
            }
            // Replace missing value with max index
            Enumeration instEnumerate = data.enumerateInstances();
            while (instEnumerate.hasMoreElements()) {
                Instance instance = (Instance) instEnumerate.nextElement();
                if (instance.isMissing(attr.index())) {
                    instance.setValue(attr.index(), maxIndex);
                }
            }
        }
    }
    makeMyC45Tree(data);

}

From source file:naivebayes.NBTubesAI.java

@Override
public double classifyInstance(Instance instance) throws Exception {
    int jumlahKelas = instance.classAttribute().numValues();
    double[] classifyResult = new double[jumlahKelas];

    //iterasi menghitung probabilitas untuk seluruh kelas
    for (int i = 0; i < jumlahKelas; i++) {

        //Rumus probabilitas Naive Bayes here

        classifyResult[i] = (double) classCount.get(i + 0.0) / numInstance;

        Enumeration<Attribute> enumAttr = instance.enumerateAttributes();

        while (enumAttr.hasMoreElements()) {
            Attribute temp = enumAttr.nextElement();

            if (!instance.isMissing(temp)) {

                try {
                    classifyResult[i] = classifyResult[i]
                            * distribution.get(temp.name()).get(instance.stringValue(temp)).get(i + 0.0);

                } catch (NullPointerException e) {
                    classifyResult[i] = 0;
                }/*from w w  w .j av a  2 s  . c  o  m*/

            }

        }

    }
    double maxValue = 0;
    int currentIndex = 0;
    for (int i = 0; i < jumlahKelas; i++) {
        if (maxValue < classifyResult[i]) {
            currentIndex = i;
            maxValue = classifyResult[i];
        }
    }
    return currentIndex;

}

From source file:naivebayes.NBTubesAI.java

@Override
public double[] distributionForInstance(Instance instance) throws Exception {
    int jumlahKelas = instance.classAttribute().numValues();

    double[] classifyResult = new double[jumlahKelas];

    //iterasi menghitung probabilitas untuk seluruh kelas
    for (int i = 0; i < jumlahKelas; i++) {

        //Rumus probabilitas Naive Bayes here

        classifyResult[i] = (double) classCount.get(i + 0.0) / numInstance;

        Enumeration<Attribute> enumAttr = instance.enumerateAttributes();

        while (enumAttr.hasMoreElements()) {
            Attribute temp = enumAttr.nextElement();

            if (!instance.isMissing(temp)) {

                try {
                    classifyResult[i] = classifyResult[i]
                            * distribution.get(temp.name()).get(instance.stringValue(temp)).get(i + 0.0);

                } catch (NullPointerException e) {

                }// w w  w  .  j  av  a2s . c  o  m

            }

        }

    }

    return classifyResult;
}

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Classifies a given test instance using the decision tree.
 * /*from   w w w  .  ja  va 2  s.c om*/
 * @param instance
 *            the instance to be classified
 * @return the classification
 */
public double classifyInstance(Instance instance) {
    if (m_Attribute == null) {
        return m_ClassValue;
    } else if (instance.isMissing(m_Attribute)) {
        try {
            // Use superclass implementation, which uses distributionForInstance.
            return super.classifyInstance(instance);
        } catch (Exception x) {
            x.printStackTrace();
            Assert.UNREACHABLE();
            return 0.;
        }
    } else {
        return m_Successors[(int) instance.value(m_Attribute)].classifyInstance(instance);
    }
}