Example usage for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Evaluates the classifier on a single instance and records the
 * prediction (if the class is nominal).
 *
 * @param classifier machine learning classifier
 * @param instance the test instance to be classified
 * @return the prediction made by the clasifier
 * @throws Exception if model could not be evaluated 
 * successfully or the data contains string attributes
 *///from ww w  .  j a va  2  s .  co  m
public double evaluateModelOnceAndRecordPrediction(Classifier classifier, Instance instance) throws Exception {

    Instance classMissing = (Instance) instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
        if (m_Predictions == null) {
            m_Predictions = new FastVector();
        }
        double[] dist = classifier.distributionForInstance(classMissing);
        pred = Utils.maxIndex(dist);
        if (dist[(int) pred] <= 0) {
            pred = Instance.missingValue();
        }
        updateStatsForClassifier(dist, instance);
        m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist, instance.weight()));
    } else {
        pred = classifier.classifyInstance(classMissing);
        updateStatsForPredictor(pred, instance);
    }
    return pred;
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * store the prediction made by the classifier as a string
 * /*  w w w.  j  a  va 2 s  .  c  o  m*/
 * @param classifier      the classifier to use
 * @param inst      the instance to generate text from
 * @param instNum      the index in the dataset
 * @param attributesToOutput   the indices of the attributes to output
 * @param printDistribution   prints the complete distribution for nominal 
 *             classes, not just the predicted value
 * @return                    the prediction as a String
 * @throws Exception      if something goes wrong
 * @see         #printClassifications(Classifier, Instances, String, int, Range, boolean)
 */
protected static String predictionText(Classifier classifier, Instance inst, int instNum,
        Range attributesToOutput, boolean printDistribution)

        throws Exception {

    StringBuffer result = new StringBuffer();
    int width = 10;
    int prec = 3;

    Instance withMissing = (Instance) inst.copy();
    withMissing.setDataset(inst.dataset());
    withMissing.setMissing(withMissing.classIndex());
    double predValue = classifier.classifyInstance(withMissing);

    // index
    result.append(Utils.padLeft("" + (instNum + 1), 6));

    if (inst.dataset().classAttribute().isNumeric()) {
        // actual
        if (inst.classIsMissing())
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.doubleToString(inst.classValue(), width, prec));
        // predicted
        if (Instance.isMissingValue(predValue))
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.doubleToString(predValue, width, prec));
        // error
        if (Instance.isMissingValue(predValue) || inst.classIsMissing())
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.doubleToString(predValue - inst.classValue(), width, prec));
    } else {
        // actual
        result.append(" "
                + Utils.padLeft(((int) inst.classValue() + 1) + ":" + inst.toString(inst.classIndex()), width));
        // predicted
        if (Instance.isMissingValue(predValue))
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.padLeft(
                    ((int) predValue + 1) + ":" + inst.dataset().classAttribute().value((int) predValue),
                    width));
        // error?
        if (!Instance.isMissingValue(predValue) && !inst.classIsMissing()
                && ((int) predValue + 1 != (int) inst.classValue() + 1))
            result.append(" " + "  +  ");
        else
            result.append(" " + "     ");
        // prediction/distribution
        if (printDistribution) {
            if (Instance.isMissingValue(predValue)) {
                result.append(" " + "?");
            } else {
                result.append(" ");
                double[] dist = classifier.distributionForInstance(withMissing);
                for (int n = 0; n < dist.length; n++) {
                    if (n > 0)
                        result.append(",");
                    if (n == (int) predValue)
                        result.append("*");
                    result.append(Utils.doubleToString(dist[n], prec));
                }
            }
        } else {
            if (Instance.isMissingValue(predValue))
                result.append(" " + "?");
            else
                result.append(" " + Utils.doubleToString(
                        classifier.distributionForInstance(withMissing)[(int) predValue], prec));
        }
    }

    // attributes
    result.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");

    return result.toString();
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Updates all the statistics about a classifiers performance for 
 * the current test instance./*from   ww  w . ja  v  a 2s  .  c o  m*/
 *
 * @param predictedDistribution the probabilities assigned to 
 * each class
 * @param instance the instance to be classified
 * @throws Exception if the class of the instance is not
 * set
 */
protected void updateStatsForClassifier(double[] predictedDistribution, Instance instance) throws Exception {

    int actualClass = (int) instance.classValue();

    if (!instance.classIsMissing()) {
        updateMargins(predictedDistribution, actualClass, instance.weight());

        // Determine the predicted class (doesn't detect multiple 
        // classifications)
        int predictedClass = -1;
        double bestProb = 0.0;
        for (int i = 0; i < m_NumClasses; i++) {
            if (predictedDistribution[i] > bestProb) {
                predictedClass = i;
                bestProb = predictedDistribution[i];
            }
        }

        m_WithClass += instance.weight();

        // Determine misclassification cost
        if (m_CostMatrix != null) {
            if (predictedClass < 0) {
                // For missing predictions, we assume the worst possible cost.
                // This is pretty harsh.
                // Perhaps we could take the negative of the cost of a correct
                // prediction (-m_CostMatrix.getElement(actualClass,actualClass)),
                // although often this will be zero
                m_TotalCost += instance.weight() * m_CostMatrix.getMaxCost(actualClass, instance);
            } else {
                m_TotalCost += instance.weight()
                        * m_CostMatrix.getElement(actualClass, predictedClass, instance);
            }
        }

        // Update counts when no class was predicted
        if (predictedClass < 0) {
            m_Unclassified += instance.weight();
            return;
        }

        double predictedProb = Math.max(MIN_SF_PROB, predictedDistribution[actualClass]);
        double priorProb = Math.max(MIN_SF_PROB, m_ClassPriors[actualClass] / m_ClassPriorsSum);
        if (predictedProb >= priorProb) {
            m_SumKBInfo += (Utils.log2(predictedProb) - Utils.log2(priorProb)) * instance.weight();
        } else {
            m_SumKBInfo -= (Utils.log2(1.0 - predictedProb) - Utils.log2(1.0 - priorProb)) * instance.weight();
        }

        m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
        m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();

        updateNumericScores(predictedDistribution, makeDistribution(instance.classValue()), instance.weight());

        // Update other stats
        m_ConfusionMatrix[actualClass][predictedClass] += instance.weight();
        if (predictedClass != actualClass) {
            m_Incorrect += instance.weight();
        } else {
            m_Correct += instance.weight();
        }
    } else {
        m_MissingClass += instance.weight();
    }
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Updates all the statistics about a predictors performance for 
 * the current test instance.// www.  j  av a  2s.co  m
 *
 * @param predictedValue the numeric value the classifier predicts
 * @param instance the instance to be classified
 * @throws Exception if the class of the instance is not
 * set
 */
protected void updateStatsForPredictor(double predictedValue, Instance instance) throws Exception {

    if (!instance.classIsMissing()) {

        // Update stats
        m_WithClass += instance.weight();
        if (Instance.isMissingValue(predictedValue)) {
            m_Unclassified += instance.weight();
            return;
        }
        m_SumClass += instance.weight() * instance.classValue();
        m_SumSqrClass += instance.weight() * instance.classValue() * instance.classValue();
        m_SumClassPredicted += instance.weight() * instance.classValue() * predictedValue;
        m_SumPredicted += instance.weight() * predictedValue;
        m_SumSqrPredicted += instance.weight() * predictedValue * predictedValue;

        if (m_ErrorEstimator == null) {
            setNumericPriorsFromBuffer();
        }
        double predictedProb = Math.max(m_ErrorEstimator.getProbability(predictedValue - instance.classValue()),
                MIN_SF_PROB);
        double priorProb = Math.max(m_PriorErrorEstimator.getProbability(instance.classValue()), MIN_SF_PROB);

        m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
        m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();
        m_ErrorEstimator.addValue(predictedValue - instance.classValue(), instance.weight());

        updateNumericScores(makeDistribution(predictedValue), makeDistribution(instance.classValue()),
                instance.weight());

    } else
        m_MissingClass += instance.weight();
}

From source file:de.ugoe.cs.cpdp.dataprocessing.MORPH.java

License:Apache License

/**
 * <p>/* ww  w  .j a  v a2s  . c o m*/
 * Determines the nearest unlike neighbor of an instance.
 * </p>
 *
 * @param instance
 *            instance to which the nearest unlike neighbor is determined
 * @param data
 *            data where the nearest unlike neighbor is determined from
 * @return nearest unlike instance
 */
public Instance getNearestUnlikeNeighbor(Instance instance, Instances data) {
    Instance nearestUnlikeNeighbor = null;

    double[] instanceVector = new double[data.numAttributes() - 1];
    int tmp = 0;
    for (int j = 0; j < data.numAttributes(); j++) {
        if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
            instanceVector[tmp] = instance.value(j);
        }
    }

    double minDistance = Double.MAX_VALUE;
    for (int i = 0; i < data.numInstances(); i++) {
        if (instance.classValue() != data.instance(i).classValue()) {
            double[] otherVector = new double[data.numAttributes() - 1];
            tmp = 0;
            for (int j = 0; j < data.numAttributes(); j++) {
                if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
                    otherVector[tmp++] = data.instance(i).value(j);
                }
            }
            if (MathArrays.distance(instanceVector, otherVector) < minDistance) {
                minDistance = MathArrays.distance(instanceVector, otherVector);
                nearestUnlikeNeighbor = data.instance(i);
            }
        }
    }
    return nearestUnlikeNeighbor;
}

From source file:de.ugoe.cs.cpdp.dataprocessing.SimulationFilter.java

License:Apache License

@Override
public void apply(Instances testdata, Instances traindata) {
    Instances newDataSet = new Instances(traindata);
    traindata.delete();/*  w  w  w  .j a  va 2s.co m*/

    HashMap<Double, Instance> artifactNames = new HashMap<Double, Instance>();

    // This is to add all data, where the first occurence of the file has a bug
    ArrayList<Double> firstOccurenceArtifactNames = new ArrayList<Double>();

    // Sort dataset (StateID is connected to the date of commit: Lower StateID
    // means earlier commit than a higher stateID)
    Attribute wekaAttribute = newDataSet.attribute("Artifact.Target.StateID");
    newDataSet.sort(wekaAttribute);

    /*
     * Logical summary: If there is an instance that dont have a bug, put it into the hashmap
     * (only unique values in there)
     * 
     * If there is an instance, that hava a bug look up if it is in the hashmap already (this
     * means: it does not had a bug before!): If this is true add it to a new dataset and remove
     * it from the hashmap, so that new changes from "nonBug" -> "bug" for this file can be
     * found.
     * 
     * If the instance has a bug and is not in the hashmap (this means: The file has a bug with
     * its first occurence or this file only has bugs and not an instance with no bug), then (if
     * it is not in the arrayList above) add it to the new dataset. This way it is possible to
     * get the first occurence of a file, which has a bug
     */
    for (int i = 0; i < newDataSet.numInstances(); i++) {
        Instance wekaInstance = newDataSet.instance(i);

        double newBugLabel = wekaInstance.classValue();
        Attribute wekaArtifactName = newDataSet.attribute("Artifact.Name");
        Double artifactName = wekaInstance.value(wekaArtifactName);

        if (newBugLabel == 0.0 && artifactNames.keySet().contains(artifactName)) {
            artifactNames.put(artifactName, wekaInstance);
        } else if (newBugLabel == 0.0 && !artifactNames.keySet().contains(artifactName)) {
            artifactNames.put(artifactName, wekaInstance);
        } else if (newBugLabel == 1.0 && artifactNames.keySet().contains(artifactName)) {
            traindata.add(wekaInstance);
            artifactNames.remove(artifactName);
        } else if (newBugLabel == 1.0 && !artifactNames.keySet().contains(artifactName)) {
            if (!firstOccurenceArtifactNames.contains(artifactName)) {
                traindata.add(wekaInstance);
                firstOccurenceArtifactNames.add(artifactName);
            }
        }
    }

    // If we have a file, that never had a bug (this is, when it is NOT in the
    // new created dataset, but it is in the HashMap from above) add it to
    // the new dataset

    double[] artifactNamesinNewDataSet = traindata.attributeToDoubleArray(0);
    HashMap<Double, Instance> artifactNamesCopy = new HashMap<Double, Instance>(artifactNames);

    for (Double artifactName : artifactNames.keySet()) {

        for (int i = 0; i < artifactNamesinNewDataSet.length; i++) {
            if (artifactNamesinNewDataSet[i] == artifactName) {
                artifactNamesCopy.remove(artifactName);
            }
        }
    }

    for (Double artifact : artifactNamesCopy.keySet()) {
        traindata.add(artifactNamesCopy.get(artifact));
    }

}

From source file:decisiontree.MyC45.java

/**
* Method for building an C45 tree.//from   ww  w . ja  v  a2  s  .  co  m
*
* @param instances the training data
* @exception Exception if decision tree can't be built successfully
*/
private void makeTree(Instances instances) throws Exception {

    // Check if no instances have reached this node.
    if (instances.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[instances.numClasses()];
        return;
    }

    // Compute attribute with maximum gain ratio.
    double[] gainRatios = new double[instances.numAttributes()];
    Enumeration attrEnum = instances.enumerateAttributes();
    while (attrEnum.hasMoreElements()) {
        Attribute attr = (Attribute) attrEnum.nextElement();
        if (attr.isNominal()) {
            gainRatios[attr.index()] = computeGainRatio(instances, attr);
        } else if (attr.isNumeric()) {
            gainRatios[attr.index()] = computeGainRatio(instances, attr, computeThreshold(instances, attr));
        }
    }
    m_Attribute = instances.attribute(Utils.maxIndex(gainRatios));

    // Make leaf if gain ratio is zero. 
    // Otherwise create successors.
    if (Utils.eq(gainRatios[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[instances.numClasses()];
        Enumeration instEnum = instances.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = instances.classAttribute();
    } else {
        Instances[] splitData = null;
        int child = 0;
        if (m_Attribute.isNominal()) {
            child = m_Attribute.numValues();
            splitData = splitData(instances, m_Attribute);
        } else if (m_Attribute.isNumeric()) {
            child = 2;
            splitData = splitData(instances, m_Attribute, computeThreshold(instances, m_Attribute));
        }
        m_Successors = new MyC45[child];
        for (int j = 0; j < child; j++) {
            m_Successors[j] = new MyC45();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:decisiontree.MyID3.java

private void makeTree(Instances data) {
    // Check if no instances have reached this node.  
    if (data.numInstances() == 0) {
        splitAttr = null;//ww  w  .  ja  va  2 s  .  c o  m
        leafValue = Double.NaN;
        leafDist = new double[data.numClasses()];
        return;
    }

    if (data.numDistinctValues(data.classIndex()) == 1) {
        leafValue = data.firstInstance().classValue();
        return;
    }

    // Compute attribute with maximum information gain.  
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    splitAttr = data.attribute(maxIndex(infoGains));

    // Make leaf if information gain is zero.   
    // Otherwise create successors.  
    if (Utils.eq(infoGains[splitAttr.index()], 0)) {
        splitAttr = null;
        leafDist = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            leafDist[(int) inst.classValue()]++;
        }
        normalize(leafDist);
        leafValue = Utils.maxIndex(leafDist);
        classAttr = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, splitAttr);
        child = new MyID3[splitAttr.numValues()];
        for (int j = 0; j < splitAttr.numValues(); j++) {
            child[j] = new MyID3();
            child[j].makeTree(splitData[j]);
        }
    }
}

From source file:decisiontree.MyID3.java

private double computeEntropy(Instances data) {
    int numClasses = data.numClasses();
    int[] classCount = new int[numClasses];
    ArrayList<Double> classValues = new ArrayList<>();
    Enumeration<Instance> instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance instance = instEnum.nextElement();
        double classValue = instance.classValue();
        if (!classValues.contains(classValue)) {
            classValues.add(classValue);
        }//w w  w.j  a  va  2s. c o  m
        int index = classValues.indexOf(classValue);
        classCount[index]++;
    }
    double entropy = 0.0;
    for (Double value : classValues) {
        int index = classValues.indexOf(value);
        if (classCount[index] > 0) {
            double temp = (double) classCount[index] / data.numInstances();
            entropy -= temp * Utils.log2(temp);
        }
    }
    return entropy;

}

From source file:dewaweebtreeclassifier.Sujeong.java

public double computeEntropy(Instances data) {
    double[] nClass = new double[data.numClasses()];
    Enumeration enumInstance = data.enumerateInstances();
    while (enumInstance.hasMoreElements()) {
        Instance instance = (Instance) enumInstance.nextElement();
        nClass[(int) instance.classValue()]++;
    }/*from  www  . ja  v  a  2  s  .  c o  m*/

    double entropy = 0.0;
    for (int i = 0; i < data.numClasses(); i++) {
        if (nClass[i] > 0) {
            double ratio = nClass[i] / data.numInstances();
            entropy -= (ratio * Utils.log2(ratio));
        }
    }

    return entropy;
}