Example usage for weka.core Utils gr

List of usage examples for weka.core Utils gr

Introduction

In this page you can find the example usage for weka.core Utils gr.

Prototype

public staticboolean gr(double a, double b) 

Source Link

Document

Tests if a is greater than b.

Usage

From source file:MultiClassClassifier.java

License:Open Source License

/**
 * Returns the distribution for an instance.
 *
 * @param inst the instance to get the distribution for
 * @return the distribution//www .  j a  v  a  2  s  . c  o  m
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance inst) throws Exception {

    if (m_Classifiers.length == 1) {
        return m_Classifiers[0].distributionForInstance(inst);
    }

    double[] probs = new double[inst.numClasses()];

    if (m_Method == METHOD_1_AGAINST_1) {
        double[][] r = new double[inst.numClasses()][inst.numClasses()];
        double[][] n = new double[inst.numClasses()][inst.numClasses()];

        for (int i = 0; i < m_ClassFilters.length; i++) {
            if (m_Classifiers[i] != null) {
                Instance tempInst = (Instance) inst.copy();
                tempInst.setDataset(m_TwoClassDataset);
                double[] current = m_Classifiers[i].distributionForInstance(tempInst);
                Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
                range.setUpper(m_ClassAttribute.numValues());
                int[] pair = range.getSelection();
                if (m_pairwiseCoupling && inst.numClasses() > 2) {
                    r[pair[0]][pair[1]] = current[0];
                    n[pair[0]][pair[1]] = m_SumOfWeights[i];
                } else {
                    if (current[0] > current[1]) {
                        probs[pair[0]] += 1.0;
                    } else {
                        probs[pair[1]] += 1.0;
                    }
                }
            }
        }
        if (m_pairwiseCoupling && inst.numClasses() > 2) {
            return pairwiseCoupling(n, r);
        }
    } else {
        // error correcting style methods
        for (int i = 0; i < m_ClassFilters.length; i++) {
            m_ClassFilters[i].input(inst);
            m_ClassFilters[i].batchFinished();
            double[] current = m_Classifiers[i].distributionForInstance(m_ClassFilters[i].output());
            //Calibrate the binary classifier scores

            for (int j = 0; j < m_ClassAttribute.numValues(); j++) {
                if (((MakeIndicator) m_ClassFilters[i]).getValueRange().isInRange(j)) {
                    probs[j] += current[1];
                } else {
                    probs[j] += current[0];
                }
            }
        }
    }

    if (Utils.gr(Utils.sum(probs), 0)) {
        Utils.normalize(probs);
        return probs;
    } else {
        return m_ZeroR.distributionForInstance(inst);
    }
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Outputs the performance statistics in summary form. Lists number (and
 * percentage) of instances classified correctly, incorrectly and
 * unclassified. Outputs the total number of instances classified, and the
 * number of instances (if any) that had no class value provided.
 * //from  w  w  w . j  a v a  2s. c  o  m
 * @param title the title for the statistics
 * @param printComplexityStatistics if true, complexity statistics are
 *          returned as well
 * @return the summary as a String
 */
public String toSummaryString(String title, boolean printComplexityStatistics) {

    StringBuffer text = new StringBuffer();

    if (printComplexityStatistics && m_NoPriors) {
        printComplexityStatistics = false;
        System.err.println("Priors disabled, cannot print complexity statistics!");
    }

    text.append(title + "\n");
    try {
        if (m_WithClass > 0) {
            if (m_ClassIsNominal) {

                text.append("Correctly Classified Instances     ");
                text.append(Utils.doubleToString(correct(), 12, 4) + "     "
                        + Utils.doubleToString(pctCorrect(), 12, 4) + " %\n");
                text.append("Incorrectly Classified Instances   ");
                text.append(Utils.doubleToString(incorrect(), 12, 4) + "     "
                        + Utils.doubleToString(pctIncorrect(), 12, 4) + " %\n");
                text.append("Kappa statistic                    ");
                text.append(Utils.doubleToString(kappa(), 12, 4) + "\n");

                if (m_CostMatrix != null) {
                    text.append("Total Cost                         ");
                    text.append(Utils.doubleToString(totalCost(), 12, 4) + "\n");
                    text.append("Average Cost                       ");
                    text.append(Utils.doubleToString(avgCost(), 12, 4) + "\n");
                }
                if (printComplexityStatistics) {
                    text.append("K&B Relative Info Score            ");
                    text.append(Utils.doubleToString(KBRelativeInformation(), 12, 4) + " %\n");
                    text.append("K&B Information Score              ");
                    text.append(Utils.doubleToString(KBInformation(), 12, 4) + " bits");
                    text.append(Utils.doubleToString(KBMeanInformation(), 12, 4) + " bits/instance\n");
                }
            } else {
                text.append("Correlation coefficient            ");
                text.append(Utils.doubleToString(correlationCoefficient(), 12, 4) + "\n");
            }
            if (printComplexityStatistics) {
                text.append("Class complexity | order 0         ");
                text.append(Utils.doubleToString(SFPriorEntropy(), 12, 4) + " bits");
                text.append(Utils.doubleToString(SFMeanPriorEntropy(), 12, 4) + " bits/instance\n");
                text.append("Class complexity | scheme          ");
                text.append(Utils.doubleToString(SFSchemeEntropy(), 12, 4) + " bits");
                text.append(Utils.doubleToString(SFMeanSchemeEntropy(), 12, 4) + " bits/instance\n");
                text.append("Complexity improvement     (Sf)    ");
                text.append(Utils.doubleToString(SFEntropyGain(), 12, 4) + " bits");
                text.append(Utils.doubleToString(SFMeanEntropyGain(), 12, 4) + " bits/instance\n");
            }

            text.append("Mean absolute error                ");
            text.append(Utils.doubleToString(meanAbsoluteError(), 12, 4) + "\n");
            text.append("Root mean squared error            ");
            text.append(Utils.doubleToString(rootMeanSquaredError(), 12, 4) + "\n");
            if (!m_NoPriors) {
                text.append("Relative absolute error            ");
                text.append(Utils.doubleToString(relativeAbsoluteError(), 12, 4) + " %\n");
                text.append("Root relative squared error        ");
                text.append(Utils.doubleToString(rootRelativeSquaredError(), 12, 4) + " %\n");
            }
        }
        if (Utils.gr(unclassified(), 0)) {
            text.append("UnClassified Instances             ");
            text.append(Utils.doubleToString(unclassified(), 12, 4) + "     "
                    + Utils.doubleToString(pctUnclassified(), 12, 4) + " %\n");
        }
        text.append("Total Number of Instances          ");
        text.append(Utils.doubleToString(m_WithClass, 12, 4) + "\n");
        if (m_MissingClass > 0) {
            text.append("Ignored Class Unknown Instances            ");
            text.append(Utils.doubleToString(m_MissingClass, 12, 4) + "\n");
        }
    } catch (Exception ex) {
        // Should never occur since the class is known to be nominal
        // here
        System.err.println("Arggh - Must be a bug in Evaluation class");
    }

    return text.toString();
}

From source file:ChiSquare.ChiSquaredAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully//from w ww.  jav  a 2 s  .  c om
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false);
        }
    }
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Outputs the performance statistics in summary form. Lists 
 * number (and percentage) of instances classified correctly, 
 * incorrectly and unclassified. Outputs the total number of 
 * instances classified, and the number of instances (if any) 
 * that had no class value provided. /*from   w w w .j a v a  2  s .  c  o  m*/
 *
 * @param title the title for the statistics
 * @param printComplexityStatistics if true, complexity statistics are
 * returned as well
 * @return the summary as a String
 */
public String toSummaryString(String title, boolean printComplexityStatistics) {

    StringBuffer text = new StringBuffer();

    if (printComplexityStatistics && m_NoPriors) {
        printComplexityStatistics = false;
        System.err.println("Priors disabled, cannot print complexity statistics!");
    }

    text.append(title + "\n");
    try {
        if (m_WithClass > 0) {
            if (m_ClassIsNominal) {

                text.append("Correctly Classified Instances     ");
                text.append(Utils.doubleToString(correct(), 12, 4) + "     "
                        + Utils.doubleToString(pctCorrect(), 12, 4) + " %\n");
                text.append("Incorrectly Classified Instances   ");
                text.append(Utils.doubleToString(incorrect(), 12, 4) + "     "
                        + Utils.doubleToString(pctIncorrect(), 12, 4) + " %\n");
                text.append("Kappa statistic                    ");
                text.append(Utils.doubleToString(kappa(), 12, 4) + "\n");

                if (m_CostMatrix != null) {
                    text.append("Total Cost                         ");
                    text.append(Utils.doubleToString(totalCost(), 12, 4) + "\n");
                    text.append("Average Cost                       ");
                    text.append(Utils.doubleToString(avgCost(), 12, 4) + "\n");
                }
                if (printComplexityStatistics) {
                    text.append("K&B Relative Info Score            ");
                    text.append(Utils.doubleToString(KBRelativeInformation(), 12, 4) + " %\n");
                    text.append("K&B Information Score              ");
                    text.append(Utils.doubleToString(KBInformation(), 12, 4) + " bits");
                    text.append(Utils.doubleToString(KBMeanInformation(), 12, 4) + " bits/instance\n");
                }
            } else {
                text.append("Correlation coefficient            ");
                text.append(Utils.doubleToString(correlationCoefficient(), 12, 4) + "\n");
            }
            if (printComplexityStatistics) {
                text.append("Class complexity | order 0         ");
                text.append(Utils.doubleToString(SFPriorEntropy(), 12, 4) + " bits");
                text.append(Utils.doubleToString(SFMeanPriorEntropy(), 12, 4) + " bits/instance\n");
                text.append("Class complexity | scheme          ");
                text.append(Utils.doubleToString(SFSchemeEntropy(), 12, 4) + " bits");
                text.append(Utils.doubleToString(SFMeanSchemeEntropy(), 12, 4) + " bits/instance\n");
                text.append("Complexity improvement     (Sf)    ");
                text.append(Utils.doubleToString(SFEntropyGain(), 12, 4) + " bits");
                text.append(Utils.doubleToString(SFMeanEntropyGain(), 12, 4) + " bits/instance\n");
            }

            text.append("Mean absolute error                ");
            text.append(Utils.doubleToString(meanAbsoluteError(), 12, 4) + "\n");
            text.append("Root mean squared error            ");
            text.append(Utils.doubleToString(rootMeanSquaredError(), 12, 4) + "\n");
            if (!m_NoPriors) {
                text.append("Relative absolute error            ");
                text.append(Utils.doubleToString(relativeAbsoluteError(), 12, 4) + " %\n");
                text.append("Root relative squared error        ");
                text.append(Utils.doubleToString(rootRelativeSquaredError(), 12, 4) + " %\n");
            }
        }
        if (Utils.gr(unclassified(), 0)) {
            text.append("UnClassified Instances             ");
            text.append(Utils.doubleToString(unclassified(), 12, 4) + "     "
                    + Utils.doubleToString(pctUnclassified(), 12, 4) + " %\n");
        }
        text.append("Total Number of Instances          ");
        text.append(Utils.doubleToString(m_WithClass, 12, 4) + "\n");
        if (m_MissingClass > 0) {
            text.append("Ignored Class Unknown Instances            ");
            text.append(Utils.doubleToString(m_MissingClass, 12, 4) + "\n");
        }
    } catch (Exception ex) {
        // Should never occur since the class is known to be nominal 
        // here
        System.err.println("Arggh - Must be a bug in Evaluation class");
    }

    return text.toString();
}

From source file:de.uni_potsdam.hpi.bpt.promnicat.analysisModules.clustering.ProcessInstances.java

License:Open Source License

/**
 * Checks if the given instance is compatible with this dataset. Only looks
 * at the size of the instance and the ranges of the values for nominal and
 * string attributes.//from   w w w  . jav a2  s  . c om
 * 
 * @param instance
 *            the instance to check
 * @return true if the instance is compatible with the dataset
 */
public/* @pure@ */boolean checkInstance(ProcessInstance instance) {

    if (instance.numAttributes() != numAttributes()) {
        return false;
    }
    if (instance.numStrAttributes() != numStrAttributes()) {
        return false;
    }
    for (int i = 0; i < numAttributes(); i++) {
        if (instance.isMissing(i)) {
            continue;
        } else if (attribute(i).isNominal() || attribute(i).isString()) {
            if (!(Utils.eq(instance.value(i), (double) (int) instance.value(i)))) {
                return false;
            } else if (Utils.sm(instance.value(i), 0)
                    || Utils.gr(instance.value(i), attribute(i).numValues())) {
                return false;
            }
        }
    }
    return true;
}

From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully/*from  www . j a  va 2 s.c  om*/
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i]));
        }
    }
}

From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java

License:Open Source License

/**
 * Computes chi-squared statistic for a contingency table.
 *
 * @param matrix the contigency table//from  w  ww. j  a  v a 2s. com
 * @param useYates is Yates' correction to be used?
 * @return the value of the chi-squared statistic
 */
public static double chiVal(double[][] matrix) {

    int df, nrows, ncols, row, col;
    double[] rtotal, ctotal;
    double expect = 0, chival = 0, n = 0;
    boolean yates = true;

    nrows = matrix.length;
    ncols = matrix[0].length;
    rtotal = new double[nrows];
    ctotal = new double[ncols];
    for (row = 0; row < nrows; row++) {
        for (col = 0; col < ncols; col++) {
            rtotal[row] += matrix[row][col];
            ctotal[col] += matrix[row][col];
            n += matrix[row][col];
        }
    }
    df = (nrows - 1) * (ncols - 1);

    chival = 0.0;
    for (row = 0; row < nrows; row++) {
        if (Utils.gr(rtotal[row], 0)) {
            for (col = 0; col < ncols; col++) {
                if (Utils.gr(ctotal[col], 0)) {
                    expect = (ctotal[col] * rtotal[row]) / n;

                    if ((df >= 1) && expect > EXPECTED) {
                        yates = false;
                    } else if (df <= 0) {
                        return 0;
                    } else if ((df <= 1) && (expect <= EXPECTED)) {
                        yates = true;
                    }

                    chival += chiCell(matrix[row][col], expect, yates);

                }
            }
        }
    }
    return chival;
}

From source file:feature.InfoGainEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Discretizes all
 * attributes that are numeric./*w  ww .ja v a 2 s .c  o  m*/
 *
 * @param data
 *            set of instances serving as training data
 * @throws Exception
 *             if the evaluator has not been generated successfully
 */
public double computeInfoGain(Instances data, int att) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    m_InfoGains[att] = (ContingencyTables.entropyOverColumns(counts[att])
            - ContingencyTables.entropyConditionedOnRows(counts[att]));

    return m_InfoGains[att];
}

From source file:feature.InfoGainEval.java

License:Open Source License

public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {//from w w w  .ja v  a  2 s  .c o m
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i])
                    - ContingencyTables.entropyConditionedOnRows(counts[i]));
        }
    }
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Outputs the performance statistics in summary form. Lists
 * number (and percentage) of instances classified correctly,
 * incorrectly and unclassified. Outputs the total number of
 * instances classified, and the number of instances (if any)
 * that had no class value provided./*  w  w w  . ja v a 2  s. c om*/
 *
 * @param title the title for the statistics
 * @param printComplexityStatistics if true, complexity statistics are
 * returned as well
 * @return the summary as a String
 */
public String toSummaryString(String title, boolean printComplexityStatistics) {

    double mae, mad = 0;
    StringBuffer text = new StringBuffer();

    text.append(title + "\n");
    try {
        if (m_WithClass > 0) {
            if (m_ClassIsNominal) {

                text.append("Correctly Classified Instances     ");
                text.append(Utils.doubleToString(correct(), 12, 4) + "     "
                        + Utils.doubleToString(pctCorrect(), 12, 4) + " %\n");
                text.append("Incorrectly Classified Instances   ");
                text.append(Utils.doubleToString(incorrect(), 12, 4) + "     "
                        + Utils.doubleToString(pctIncorrect(), 12, 4) + " %\n");
                text.append("Kappa statistic                    ");
                text.append(Utils.doubleToString(kappa(), 12, 4) + "\n");

                if (m_CostMatrix != null) {
                    text.append("Total Cost                         ");
                    text.append(Utils.doubleToString(totalCost(), 12, 4) + "\n");
                    text.append("Average Cost                       ");
                    text.append(Utils.doubleToString(avgCost(), 12, 4) + "\n");
                }
                if (printComplexityStatistics) {
                    text.append("K&B Relative Info Score            ");
                    text.append(Utils.doubleToString(KBRelativeInformation(), 12, 4) + " %\n");
                    text.append("K&B Information Score              ");
                    text.append(Utils.doubleToString(KBInformation(), 12, 4) + " bits");
                    text.append(Utils.doubleToString(KBMeanInformation(), 12, 4) + " bits/instance\n");
                }
            } else {
                text.append("Correlation coefficient            ");
                text.append(Utils.doubleToString(correlationCoefficient(), 12, 4) + "\n");
            }
            if (printComplexityStatistics) {
                text.append("Class complexity | order 0         ");
                text.append(Utils.doubleToString(SFPriorEntropy(), 12, 4) + " bits");
                text.append(Utils.doubleToString(SFMeanPriorEntropy(), 12, 4) + " bits/instance\n");
                text.append("Class complexity | scheme          ");
                text.append(Utils.doubleToString(SFSchemeEntropy(), 12, 4) + " bits");
                text.append(Utils.doubleToString(SFMeanSchemeEntropy(), 12, 4) + " bits/instance\n");
                text.append("Complexity improvement     (Sf)    ");
                text.append(Utils.doubleToString(SFEntropyGain(), 12, 4) + " bits");
                text.append(Utils.doubleToString(SFMeanEntropyGain(), 12, 4) + " bits/instance\n");
            }

            text.append("Mean absolute error                ");
            text.append(Utils.doubleToString(meanAbsoluteError(), 12, 4) + "\n");
            text.append("Root mean squared error            ");
            text.append(Utils.doubleToString(rootMeanSquaredError(), 12, 4) + "\n");
            text.append("Relative absolute error            ");
            text.append(Utils.doubleToString(relativeAbsoluteError(), 12, 4) + " %\n");
            text.append("Root relative squared error        ");
            text.append(Utils.doubleToString(rootRelativeSquaredError(), 12, 4) + " %\n");
        }
        if (Utils.gr(unclassified(), 0)) {
            text.append("UnClassified Instances             ");
            text.append(Utils.doubleToString(unclassified(), 12, 4) + "     "
                    + Utils.doubleToString(pctUnclassified(), 12, 4) + " %\n");
        }
        text.append("Total Number of Instances          ");
        text.append(Utils.doubleToString(m_WithClass, 12, 4) + "\n");
        if (m_MissingClass > 0) {
            text.append("Ignored Class Unknown Instances            ");
            text.append(Utils.doubleToString(m_MissingClass, 12, 4) + "\n");
        }
    } catch (Exception ex) {
        // Should never occur since the class is known to be nominal
        // here
        System.err.println("Arggh - Must be a bug in Evaluation class");
    }

    return text.toString();
}