Example usage for weka.core ContingencyTables gainRatio

List of usage examples for weka.core ContingencyTables gainRatio

Introduction

In this page you can find the example usage for weka.core ContingencyTables gainRatio.

Prototype

public static double gainRatio(double[][] matrix) 

Source Link

Document

Computes gain ratio for contingency table (split on rows).

Usage

From source file:GainRatioAttributeEval1.java

License:Open Source License

/**
 * evaluates an individual attribute by measuring the gain ratio
 * of the class given the attribute./*from   w w w  .j a va2  s. com*/
 *
 * @param attribute the index of the attribute to be evaluated
 * @return the gain ratio
 * @throws Exception if the attribute could not be evaluated
 */
public double evaluateAttribute(int attribute) throws Exception {
    int i, j, ii, jj;
    int ni, nj;
    double sum = 0.0;
    ni = m_trainInstances.attribute(attribute).numValues() + 1;
    nj = m_numClasses + 1;
    double[] sumi, sumj;
    Instance inst;
    double temp = 0.0;
    sumi = new double[ni];
    sumj = new double[nj];
    double[][] counts = new double[ni][nj];
    sumi = new double[ni];
    sumj = new double[nj];

    for (i = 0; i < ni; i++) {
        sumi[i] = 0.0;

        for (j = 0; j < nj; j++) {
            sumj[j] = 0.0;
            counts[i][j] = 0.0;
        }
    }

    // Fill the contingency table
    for (i = 0; i < m_numInstances; i++) {
        inst = m_trainInstances.instance(i);

        if (inst.isMissing(attribute)) {
            ii = ni - 1;
        } else {
            ii = (int) inst.value(attribute);
        }

        if (inst.isMissing(m_classIndex)) {
            jj = nj - 1;
        } else {
            jj = (int) inst.value(m_classIndex);
        }

        counts[ii][jj]++;
    }

    // get the row totals
    for (i = 0; i < ni; i++) {
        sumi[i] = 0.0;

        for (j = 0; j < nj; j++) {
            sumi[i] += counts[i][j];
            sum += counts[i][j];
        }
    }

    // get the column totals
    for (j = 0; j < nj; j++) {
        sumj[j] = 0.0;

        for (i = 0; i < ni; i++) {
            sumj[j] += counts[i][j];
        }
    }

    // distribute missing counts
    if (m_missing_merge && (sumi[ni - 1] < m_numInstances) && (sumj[nj - 1] < m_numInstances)) {
        double[] i_copy = new double[sumi.length];
        double[] j_copy = new double[sumj.length];
        double[][] counts_copy = new double[sumi.length][sumj.length];

        for (i = 0; i < ni; i++) {
            System.arraycopy(counts[i], 0, counts_copy[i], 0, sumj.length);
        }

        System.arraycopy(sumi, 0, i_copy, 0, sumi.length);
        System.arraycopy(sumj, 0, j_copy, 0, sumj.length);
        double total_missing = (sumi[ni - 1] + sumj[nj - 1] - counts[ni - 1][nj - 1]);

        // do the missing i's
        if (sumi[ni - 1] > 0.0) {
            for (j = 0; j < nj - 1; j++) {
                if (counts[ni - 1][j] > 0.0) {
                    for (i = 0; i < ni - 1; i++) {
                        temp = ((i_copy[i] / (sum - i_copy[ni - 1])) * counts[ni - 1][j]);
                        counts[i][j] += temp;
                        sumi[i] += temp;
                    }

                    counts[ni - 1][j] = 0.0;
                }
            }
        }

        sumi[ni - 1] = 0.0;

        // do the missing j's
        if (sumj[nj - 1] > 0.0) {
            for (i = 0; i < ni - 1; i++) {
                if (counts[i][nj - 1] > 0.0) {
                    for (j = 0; j < nj - 1; j++) {
                        temp = ((j_copy[j] / (sum - j_copy[nj - 1])) * counts[i][nj - 1]);
                        counts[i][j] += temp;
                        sumj[j] += temp;
                    }

                    counts[i][nj - 1] = 0.0;
                }
            }
        }

        sumj[nj - 1] = 0.0;

        // do the both missing
        if (counts[ni - 1][nj - 1] > 0.0 && total_missing != sum) {
            for (i = 0; i < ni - 1; i++) {
                for (j = 0; j < nj - 1; j++) {
                    temp = (counts_copy[i][j] / (sum - total_missing)) * counts_copy[ni - 1][nj - 1];
                    counts[i][j] += temp;
                    sumi[i] += temp;
                    sumj[j] += temp;
                }
            }

            counts[ni - 1][nj - 1] = 0.0;
        }
    }

    return ContingencyTables.gainRatio(counts);
}