Example usage for weka.core Instance classValue

List of usage examples for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for nominal attribute and nominal class
 * and returns value.//w  ww  . j ava 2s  .c  o m
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNominalNominal(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal;
    double[][] counts = new double[m_Instances.attribute(index).numValues() + 1][m_Instances.numClasses()];
    double[] sumCounts = new double[m_Instances.numClasses()];
    double[][] bestDist = new double[3][m_Instances.numClasses()];
    int numMissing = 0;

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (inst.isMissing(index)) {
            numMissing++;
            counts[m_Instances.attribute(index).numValues()][(int) inst.classValue()] += inst.weight();
        } else {
            counts[(int) inst.value(index)][(int) inst.classValue()] += inst.weight();
        }
    }

    // Compute sum of counts
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            sumCounts[j] += counts[i][j];
        }
    }

    // Make split counts for each possible split and evaluate
    System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0, m_Distribution[2], 0,
            m_Instances.numClasses());
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            m_Distribution[0][j] = counts[i][j];
            m_Distribution[1][j] = sumCounts[j] - counts[i][j];
        }
        currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
        if (currVal < bestVal) {
            bestVal = currVal;
            m_SplitPoint = (double) i;
            for (int j = 0; j < 3; j++) {
                System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses());
            }
        }
    }

    // No missing values in training data.
    if (numMissing == 0) {
        System.arraycopy(sumCounts, 0, bestDist[2], 0, m_Instances.numClasses());
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for nominal attribute and numeric class
 * and returns value./*from   w  ww  .  j a v a 2 s. c om*/
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNominalNumeric(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal;
    double[] sumsSquaresPerValue = new double[m_Instances.attribute(index).numValues()],
            sumsPerValue = new double[m_Instances.attribute(index).numValues()],
            weightsPerValue = new double[m_Instances.attribute(index).numValues()];
    double totalSumSquaresW = 0, totalSumW = 0, totalSumOfWeightsW = 0, totalSumOfWeights = 0, totalSum = 0;
    double[] sumsSquares = new double[3], sumOfWeights = new double[3];
    double[][] bestDist = new double[3][1];

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (inst.isMissing(index)) {
            m_Distribution[2][0] += inst.classValue() * inst.weight();
            sumsSquares[2] += inst.classValue() * inst.classValue() * inst.weight();
            sumOfWeights[2] += inst.weight();
        } else {
            weightsPerValue[(int) inst.value(index)] += inst.weight();
            sumsPerValue[(int) inst.value(index)] += inst.classValue() * inst.weight();
            sumsSquaresPerValue[(int) inst.value(index)] += inst.classValue() * inst.classValue()
                    * inst.weight();
        }
        totalSumOfWeights += inst.weight();
        totalSum += inst.classValue() * inst.weight();
    }

    // Check if the total weight is zero
    if (totalSumOfWeights <= 0) {
        return bestVal;
    }

    // Compute sum of counts without missing ones
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
        totalSumOfWeightsW += weightsPerValue[i];
        totalSumSquaresW += sumsSquaresPerValue[i];
        totalSumW += sumsPerValue[i];
    }

    // Make split counts for each possible split and evaluate
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {

        m_Distribution[0][0] = sumsPerValue[i];
        sumsSquares[0] = sumsSquaresPerValue[i];
        sumOfWeights[0] = weightsPerValue[i];
        m_Distribution[1][0] = totalSumW - sumsPerValue[i];
        sumsSquares[1] = totalSumSquaresW - sumsSquaresPerValue[i];
        sumOfWeights[1] = totalSumOfWeightsW - weightsPerValue[i];

        currVal = variance(m_Distribution, sumsSquares, sumOfWeights);

        if (currVal < bestVal) {
            bestVal = currVal;
            m_SplitPoint = (double) i;
            for (int j = 0; j < 3; j++) {
                if (sumOfWeights[j] > 0) {
                    bestDist[j][0] = m_Distribution[j][0] / sumOfWeights[j];
                } else {
                    bestDist[j][0] = totalSum / totalSumOfWeights;
                }
            }
        }
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for numeric attribute and nominal class
 * and returns value./*from www  .  j  a v  a  2s. co m*/
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNumericNominal(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal, currCutPoint;
    int numMissing = 0;
    double[] sum = new double[m_Instances.numClasses()];
    double[][] bestDist = new double[3][m_Instances.numClasses()];

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (!inst.isMissing(index)) {
            m_Distribution[1][(int) inst.classValue()] += inst.weight();
        } else {
            m_Distribution[2][(int) inst.classValue()] += inst.weight();
            numMissing++;
        }
    }
    System.arraycopy(m_Distribution[1], 0, sum, 0, m_Instances.numClasses());

    // Save current distribution as best distribution
    for (int j = 0; j < 3; j++) {
        System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses());
    }

    // Sort instances
    m_Instances.sort(index);

    // Make split counts for each possible split and evaluate
    for (int i = 0; i < m_Instances.numInstances() - (numMissing + 1); i++) {
        Instance inst = m_Instances.instance(i);
        Instance instPlusOne = m_Instances.instance(i + 1);
        m_Distribution[0][(int) inst.classValue()] += inst.weight();
        m_Distribution[1][(int) inst.classValue()] -= inst.weight();
        if (inst.value(index) < instPlusOne.value(index)) {
            currCutPoint = (inst.value(index) + instPlusOne.value(index)) / 2.0;
            currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
            if (currVal < bestVal) {
                m_SplitPoint = currCutPoint;
                bestVal = currVal;
                for (int j = 0; j < 3; j++) {
                    System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses());
                }
            }
        }
    }

    // No missing values in training data.
    if (numMissing == 0) {
        System.arraycopy(sum, 0, bestDist[2], 0, m_Instances.numClasses());
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for numeric attribute and numeric class
 * and returns value./*  www.ja va  2 s . c  om*/
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNumericNumeric(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal, currCutPoint;
    int numMissing = 0;
    double[] sumsSquares = new double[3], sumOfWeights = new double[3];
    double[][] bestDist = new double[3][1];
    double totalSum = 0, totalSumOfWeights = 0;

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (!inst.isMissing(index)) {
            m_Distribution[1][0] += inst.classValue() * inst.weight();
            sumsSquares[1] += inst.classValue() * inst.classValue() * inst.weight();
            sumOfWeights[1] += inst.weight();
        } else {
            m_Distribution[2][0] += inst.classValue() * inst.weight();
            sumsSquares[2] += inst.classValue() * inst.classValue() * inst.weight();
            sumOfWeights[2] += inst.weight();
            numMissing++;
        }
        totalSumOfWeights += inst.weight();
        totalSum += inst.classValue() * inst.weight();
    }

    // Check if the total weight is zero
    if (totalSumOfWeights <= 0) {
        return bestVal;
    }

    // Sort instances
    m_Instances.sort(index);

    // Make split counts for each possible split and evaluate
    for (int i = 0; i < m_Instances.numInstances() - (numMissing + 1); i++) {
        Instance inst = m_Instances.instance(i);
        Instance instPlusOne = m_Instances.instance(i + 1);
        m_Distribution[0][0] += inst.classValue() * inst.weight();
        sumsSquares[0] += inst.classValue() * inst.classValue() * inst.weight();
        sumOfWeights[0] += inst.weight();
        m_Distribution[1][0] -= inst.classValue() * inst.weight();
        sumsSquares[1] -= inst.classValue() * inst.classValue() * inst.weight();
        sumOfWeights[1] -= inst.weight();
        if (inst.value(index) < instPlusOne.value(index)) {
            currCutPoint = (inst.value(index) + instPlusOne.value(index)) / 2.0;
            currVal = variance(m_Distribution, sumsSquares, sumOfWeights);
            if (currVal < bestVal) {
                m_SplitPoint = currCutPoint;
                bestVal = currVal;
                for (int j = 0; j < 3; j++) {
                    if (sumOfWeights[j] > 0) {
                        bestDist[j][0] = m_Distribution[j][0] / sumOfWeights[j];
                    } else {
                        bestDist[j][0] = totalSum / totalSumOfWeights;
                    }
                }
            }
        }
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boostingPL.boosting.AdaBoost.java

License:Open Source License

public void run(int t) throws Exception {
    if (t >= numIterations) {
        return;//  ww w .  j ava 2s .com
    }

    classifiers[t] = ClassifierWritable.newInstance("DecisionStump");
    //classifiers[t] = ClassifiersHelper.newInstance("C4.5");
    classifiers[t].buildClassifier(insts);

    double e = weightError(t);
    if (e >= 0.5) {
        System.out.println("AdaBoost Error: error rate = " + e + ", >= 0.5");
        throw new Exception("error rate > 0.5");
    }

    if (e == 0.0) {
        e = 0.0001; // don't let e == 0
    }
    cweights[t] = 0.5 * Math.log((1 - e) / e) / Math.log(Math.E);
    System.out.println("Round = " + t + "\t ErrorRate = " + e + "\t\t Weights = " + cweights[t]);

    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        if (classifiers[t].classifyInstance(inst) != inst.classValue()) {
            inst.setWeight(inst.weight() / (2 * e));
        } else {
            inst.setWeight(inst.weight() / (2 * (1 - e)));
        }
    }
}

From source file:boostingPL.boosting.SAMME.java

License:Open Source License

public void run(int t) throws Exception {
    if (t >= numIterations) {
        return;/* www .j  ava 2s  . c o m*/
    }

    classifiers[t] = ClassifierWritable.newInstance("DecisionStump");
    classifiers[t].buildClassifier(insts);

    double e = weightError(t);
    final int numClasses = insts.classAttribute().numValues();
    double maxe = 1 - 1.0 / numClasses;
    if (e >= maxe) {
        System.out.println("SAMME Error: error rate = " + e + ", >= " + maxe);
        throw new Exception("error rate > " + maxe);
    }

    if (e == 0.0) {
        e = 0.0001; // dont let e == 0
    }
    cweights[t] = Math.log((1 - e) / e) + Math.log(numClasses - 1);
    System.out.println("Round = " + t + "\tErrorRate = " + e + "\tCWeight = " + cweights[t]);

    double expCWeight = Math.exp(cweights[t]);
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        if (classifiers[t].classifyInstance(inst) != inst.classValue()) {
            inst.setWeight(inst.weight() * expCWeight);
        }
    }

    double weightSum = insts.sumOfWeights();
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        inst.setWeight(inst.weight() / weightSum);
    }

}

From source file:br.com.ufu.lsi.rebfnetwork.RBFRegressor.java

License:Open Source License

/**
 * Calculates error for single instance.
 *///ww w .  j av  a2  s. com
protected double calculateError(double[] outputs, Instance inst) {

    final double err = getOutput(outputs) - inst.classValue();

    // Add to squared error
    return err * err;
}

From source file:br.com.ufu.lsi.rebfnetwork.RBFRegressor.java

License:Open Source License

/**
 * Update the gradient for the weights in the output layer.
 *//*from   w  w  w  .  j  a v  a 2  s. com*/
protected void updateGradient(double[] grad, Instance inst, double[] outputs, double[] derivativesOutputs,
        double[] deltaHidden) {

    // Initialise deltaHidden
    Arrays.fill(deltaHidden, 0.0);

    // Calculate delta from output unit
    double deltaOut = (getOutput(outputs) - inst.classValue());

    // Go to next output unit if update too small
    if (deltaOut <= m_tolerance && deltaOut >= -m_tolerance) {
        return;
    }

    // Establish offset
    int offsetOW = OFFSET_WEIGHTS;

    // Update deltaHidden
    for (int i = 0; i < m_numUnits; i++) {
        deltaHidden[i] += deltaOut * m_RBFParameters[offsetOW + i];
    }

    // Update gradient for output weights
    for (int i = 0; i < m_numUnits; i++) {
        grad[offsetOW + i] += deltaOut * outputs[i];
    }

    // Update gradient for bias
    grad[offsetOW + m_numUnits] += deltaOut;
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Method for building an Id3 tree.//w ww  .java 2  s. com
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new Id3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new Id3();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Computes the entropy of a dataset.//from ww w  . ja v  a2 s  .c  o m
 *
 * @param data the data for which entropy is to be computed
 * @return the entropy of the data's class distribution
 * @throws Exception if computation fails
 */
private double computeEntropy(Instances data) throws Exception {

    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0) {
            entropy -= classCounts[j] * Utils.log2(classCounts[j]);
        }
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
}