Example usage for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for nominal attribute and nominal class
 * and returns value.//w  ww  . j ava 2s  .c  o m
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNominalNominal(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal;
    double[][] counts = new double[m_Instances.attribute(index).numValues() + 1][m_Instances.numClasses()];
    double[] sumCounts = new double[m_Instances.numClasses()];
    double[][] bestDist = new double[3][m_Instances.numClasses()];
    int numMissing = 0;

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (inst.isMissing(index)) {
            numMissing++;
            counts[m_Instances.attribute(index).numValues()][(int) inst.classValue()] += inst.weight();
        } else {
            counts[(int) inst.value(index)][(int) inst.classValue()] += inst.weight();
        }
    }

    // Compute sum of counts
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            sumCounts[j] += counts[i][j];
        }
    }

    // Make split counts for each possible split and evaluate
    System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0, m_Distribution[2], 0,
            m_Instances.numClasses());
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            m_Distribution[0][j] = counts[i][j];
            m_Distribution[1][j] = sumCounts[j] - counts[i][j];
        }
        currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
        if (currVal < bestVal) {
            bestVal = currVal;
            m_SplitPoint = (double) i;
            for (int j = 0; j < 3; j++) {
                System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses());
            }
        }
    }

    // No missing values in training data.
    if (numMissing == 0) {
        System.arraycopy(sumCounts, 0, bestDist[2], 0, m_Instances.numClasses());
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for nominal attribute and numeric class
 * and returns value./*from   w  ww  .  j a v a 2 s. c om*/
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNominalNumeric(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal;
    double[] sumsSquaresPerValue = new double[m_Instances.attribute(index).numValues()],
            sumsPerValue = new double[m_Instances.attribute(index).numValues()],
            weightsPerValue = new double[m_Instances.attribute(index).numValues()];
    double totalSumSquaresW = 0, totalSumW = 0, totalSumOfWeightsW = 0, totalSumOfWeights = 0, totalSum = 0;
    double[] sumsSquares = new double[3], sumOfWeights = new double[3];
    double[][] bestDist = new double[3][1];

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (inst.isMissing(index)) {
            m_Distribution[2][0] += inst.classValue() * inst.weight();
            sumsSquares[2] += inst.classValue() * inst.classValue() * inst.weight();
            sumOfWeights[2] += inst.weight();
        } else {
            weightsPerValue[(int) inst.value(index)] += inst.weight();
            sumsPerValue[(int) inst.value(index)] += inst.classValue() * inst.weight();
            sumsSquaresPerValue[(int) inst.value(index)] += inst.classValue() * inst.classValue()
                    * inst.weight();
        }
        totalSumOfWeights += inst.weight();
        totalSum += inst.classValue() * inst.weight();
    }

    // Check if the total weight is zero
    if (totalSumOfWeights <= 0) {
        return bestVal;
    }

    // Compute sum of counts without missing ones
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
        totalSumOfWeightsW += weightsPerValue[i];
        totalSumSquaresW += sumsSquaresPerValue[i];
        totalSumW += sumsPerValue[i];
    }

    // Make split counts for each possible split and evaluate
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {

        m_Distribution[0][0] = sumsPerValue[i];
        sumsSquares[0] = sumsSquaresPerValue[i];
        sumOfWeights[0] = weightsPerValue[i];
        m_Distribution[1][0] = totalSumW - sumsPerValue[i];
        sumsSquares[1] = totalSumSquaresW - sumsSquaresPerValue[i];
        sumOfWeights[1] = totalSumOfWeightsW - weightsPerValue[i];

        currVal = variance(m_Distribution, sumsSquares, sumOfWeights);

        if (currVal < bestVal) {
            bestVal = currVal;
            m_SplitPoint = (double) i;
            for (int j = 0; j < 3; j++) {
                if (sumOfWeights[j] > 0) {
                    bestDist[j][0] = m_Distribution[j][0] / sumOfWeights[j];
                } else {
                    bestDist[j][0] = totalSum / totalSumOfWeights;
                }
            }
        }
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for numeric attribute and nominal class
 * and returns value./*from www  .  j  a v  a  2s. co m*/
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNumericNominal(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal, currCutPoint;
    int numMissing = 0;
    double[] sum = new double[m_Instances.numClasses()];
    double[][] bestDist = new double[3][m_Instances.numClasses()];

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (!inst.isMissing(index)) {
            m_Distribution[1][(int) inst.classValue()] += inst.weight();
        } else {
            m_Distribution[2][(int) inst.classValue()] += inst.weight();
            numMissing++;
        }
    }
    System.arraycopy(m_Distribution[1], 0, sum, 0, m_Instances.numClasses());

    // Save current distribution as best distribution
    for (int j = 0; j < 3; j++) {
        System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses());
    }

    // Sort instances
    m_Instances.sort(index);

    // Make split counts for each possible split and evaluate
    for (int i = 0; i < m_Instances.numInstances() - (numMissing + 1); i++) {
        Instance inst = m_Instances.instance(i);
        Instance instPlusOne = m_Instances.instance(i + 1);
        m_Distribution[0][(int) inst.classValue()] += inst.weight();
        m_Distribution[1][(int) inst.classValue()] -= inst.weight();
        if (inst.value(index) < instPlusOne.value(index)) {
            currCutPoint = (inst.value(index) + instPlusOne.value(index)) / 2.0;
            currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
            if (currVal < bestVal) {
                m_SplitPoint = currCutPoint;
                bestVal = currVal;
                for (int j = 0; j < 3; j++) {
                    System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses());
                }
            }
        }
    }

    // No missing values in training data.
    if (numMissing == 0) {
        System.arraycopy(sum, 0, bestDist[2], 0, m_Instances.numClasses());
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for numeric attribute and numeric class
 * and returns value./*  www.ja va  2 s . c  om*/
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNumericNumeric(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal, currCutPoint;
    int numMissing = 0;
    double[] sumsSquares = new double[3], sumOfWeights = new double[3];
    double[][] bestDist = new double[3][1];
    double totalSum = 0, totalSumOfWeights = 0;

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (!inst.isMissing(index)) {
            m_Distribution[1][0] += inst.classValue() * inst.weight();
            sumsSquares[1] += inst.classValue() * inst.classValue() * inst.weight();
            sumOfWeights[1] += inst.weight();
        } else {
            m_Distribution[2][0] += inst.classValue() * inst.weight();
            sumsSquares[2] += inst.classValue() * inst.classValue() * inst.weight();
            sumOfWeights[2] += inst.weight();
            numMissing++;
        }
        totalSumOfWeights += inst.weight();
        totalSum += inst.classValue() * inst.weight();
    }

    // Check if the total weight is zero
    if (totalSumOfWeights <= 0) {
        return bestVal;
    }

    // Sort instances
    m_Instances.sort(index);

    // Make split counts for each possible split and evaluate
    for (int i = 0; i < m_Instances.numInstances() - (numMissing + 1); i++) {
        Instance inst = m_Instances.instance(i);
        Instance instPlusOne = m_Instances.instance(i + 1);
        m_Distribution[0][0] += inst.classValue() * inst.weight();
        sumsSquares[0] += inst.classValue() * inst.classValue() * inst.weight();
        sumOfWeights[0] += inst.weight();
        m_Distribution[1][0] -= inst.classValue() * inst.weight();
        sumsSquares[1] -= inst.classValue() * inst.classValue() * inst.weight();
        sumOfWeights[1] -= inst.weight();
        if (inst.value(index) < instPlusOne.value(index)) {
            currCutPoint = (inst.value(index) + instPlusOne.value(index)) / 2.0;
            currVal = variance(m_Distribution, sumsSquares, sumOfWeights);
            if (currVal < bestVal) {
                m_SplitPoint = currCutPoint;
                bestVal = currVal;
                for (int j = 0; j < 3; j++) {
                    if (sumOfWeights[j] > 0) {
                        bestDist[j][0] = m_Distribution[j][0] / sumOfWeights[j];
                    } else {
                        bestDist[j][0] = totalSum / totalSumOfWeights;
                    }
                }
            }
        }
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boostingPL.boosting.AdaBoost.java

License:Open Source License

public void run(int t) throws Exception {
    if (t >= numIterations) {
        return;//  ww w .  j ava 2s .com
    }

    classifiers[t] = ClassifierWritable.newInstance("DecisionStump");
    //classifiers[t] = ClassifiersHelper.newInstance("C4.5");
    classifiers[t].buildClassifier(insts);

    double e = weightError(t);
    if (e >= 0.5) {
        System.out.println("AdaBoost Error: error rate = " + e + ", >= 0.5");
        throw new Exception("error rate > 0.5");
    }

    if (e == 0.0) {
        e = 0.0001; // don't let e == 0
    }
    cweights[t] = 0.5 * Math.log((1 - e) / e) / Math.log(Math.E);
    System.out.println("Round = " + t + "\t ErrorRate = " + e + "\t\t Weights = " + cweights[t]);

    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        if (classifiers[t].classifyInstance(inst) != inst.classValue()) {
            inst.setWeight(inst.weight() / (2 * e));
        } else {
            inst.setWeight(inst.weight() / (2 * (1 - e)));
        }
    }
}

From source file:boostingPL.boosting.SAMME.java

License:Open Source License

public void run(int t) throws Exception {
    if (t >= numIterations) {
        return;/* www .j  ava 2s  . c o m*/
    }

    classifiers[t] = ClassifierWritable.newInstance("DecisionStump");
    classifiers[t].buildClassifier(insts);

    double e = weightError(t);
    final int numClasses = insts.classAttribute().numValues();
    double maxe = 1 - 1.0 / numClasses;
    if (e >= maxe) {
        System.out.println("SAMME Error: error rate = " + e + ", >= " + maxe);
        throw new Exception("error rate > " + maxe);
    }

    if (e == 0.0) {
        e = 0.0001; // dont let e == 0
    }
    cweights[t] = Math.log((1 - e) / e) + Math.log(numClasses - 1);
    System.out.println("Round = " + t + "\tErrorRate = " + e + "\tCWeight = " + cweights[t]);

    double expCWeight = Math.exp(cweights[t]);
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        if (classifiers[t].classifyInstance(inst) != inst.classValue()) {
            inst.setWeight(inst.weight() * expCWeight);
        }
    }

    double weightSum = insts.sumOfWeights();
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        inst.setWeight(inst.weight() / weightSum);
    }

}

From source file:br.com.ufu.lsi.rebfnetwork.RBFRegressor.java

License:Open Source License

/**
 * Calculates error for single instance.
 *///ww w .  j av  a2  s. com
protected double calculateError(double[] outputs, Instance inst) {

    final double err = getOutput(outputs) - inst.classValue();

    // Add to squared error
    return err * err;
}

From source file:br.com.ufu.lsi.rebfnetwork.RBFRegressor.java

License:Open Source License

/**
 * Update the gradient for the weights in the output layer.
 *//*from   w  w  w  .  j  a v  a 2  s. com*/
protected void updateGradient(double[] grad, Instance inst, double[] outputs, double[] derivativesOutputs,
        double[] deltaHidden) {

    // Initialise deltaHidden
    Arrays.fill(deltaHidden, 0.0);

    // Calculate delta from output unit
    double deltaOut = (getOutput(outputs) - inst.classValue());

    // Go to next output unit if update too small
    if (deltaOut <= m_tolerance && deltaOut >= -m_tolerance) {
        return;
    }

    // Establish offset
    int offsetOW = OFFSET_WEIGHTS;

    // Update deltaHidden
    for (int i = 0; i < m_numUnits; i++) {
        deltaHidden[i] += deltaOut * m_RBFParameters[offsetOW + i];
    }

    // Update gradient for output weights
    for (int i = 0; i < m_numUnits; i++) {
        grad[offsetOW + i] += deltaOut * outputs[i];
    }

    // Update gradient for bias
    grad[offsetOW + m_numUnits] += deltaOut;
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Method for building an Id3 tree.//w ww  .java 2  s. com
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new Id3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new Id3();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Computes the entropy of a dataset.//from ww w  . ja v  a2 s  .c  o m
 *
 * @param data the data for which entropy is to be computed
 * @return the entropy of the data's class distribution
 * @throws Exception if computation fails
 */
private double computeEntropy(Instances data) throws Exception {

    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0) {
            entropy -= classCounts[j] * Utils.log2(classCounts[j]);
        }
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
}