Example usage for weka.core ContingencyTables entropy

List of usage examples for weka.core ContingencyTables entropy

Introduction

In this page you can find the example usage for weka.core ContingencyTables entropy.

Prototype

public static double entropy(double[] array) 

Source Link

Document

Computes the entropy of the given array.

Usage

From source file:moa.reduction.bayes.PIDdiscretize.java

License:Open Source License

private double[] cutPointsForSubset(int attIndex, int first, int lastPlusOne) {

    //Map<Integer, Double> counts, bestCounts;
    double[] left, right, cutPoints;
    //double step = ((float) totalCount) / m_CutPointsL1.get(index).size();
    double currentCutPoint = -Double.MAX_VALUE, bestCutPoint = -1, currentEntropy, bestEntropy, priorEntropy,
            gain;//from  www . j  a  v a 2  s. c o  m
    int bestIndex = -1, numCutPoints = 0;
    double numInstances = 0;

    // Compute number of instances in set
    if ((lastPlusOne - first) < 2) {
        return null;
    }

    // Get the greatest class observed till here
    int numClasses = 0;
    for (int i = first; i < lastPlusOne; i++) {
        Map<Integer, Float> classDist = m_Distrib.get(attIndex).get(i);
        for (Integer key : classDist.keySet()) {
            if (key > numClasses) {
                numClasses = key;
            }
        }
    }
    numClasses += 1;

    // Compute class counts.
    double[][] counts = new double[2][numClasses];
    for (int i = first; i < lastPlusOne; i++) {
        Map<Integer, Float> classDist = m_Distrib.get(attIndex).get(i);
        for (Map.Entry<Integer, Float> entry : classDist.entrySet()) {
            counts[1][entry.getKey()] += entry.getValue();
            numInstances += entry.getValue();
        }
    }

    // Save prior counts
    double[] priorCounts = new double[numClasses];
    System.arraycopy(counts[1], 0, priorCounts, 0, numClasses);

    // Entropy of the full set
    priorEntropy = ContingencyTables.entropy(priorCounts);
    bestEntropy = priorEntropy;

    priorEntropy = ContingencyTables.entropy(priorCounts);
    bestEntropy = priorEntropy;

    // Find best entropy.
    double[][] bestCounts = new double[2][numClasses];
    for (int i = first; i < (lastPlusOne - 1); i++) {
        Map<Integer, Float> classDist = m_Distrib.get(attIndex).get(i);
        for (Map.Entry<Integer, Float> entry : classDist.entrySet()) {
            counts[0][entry.getKey()] += entry.getValue();
            counts[1][entry.getKey()] -= entry.getValue();
        }
        currentCutPoint = m_CutPointsL1.get(attIndex).get(i);
        currentEntropy = ContingencyTables.entropyConditionedOnRows(counts);
        if (currentEntropy < bestEntropy) {
            bestCutPoint = currentCutPoint;
            bestEntropy = currentEntropy;
            bestIndex = i;
            System.arraycopy(counts[0], 0, bestCounts[0], 0, numClasses);
            System.arraycopy(counts[1], 0, bestCounts[1], 0, numClasses);
        }
        numCutPoints++;
    }

    // Use worse encoding?
    if (!m_UseBetterEncoding) {
        numCutPoints = (lastPlusOne - first) - 1;
    }

    // Checks if gain is zero
    gain = priorEntropy - bestEntropy;

    if (gain <= 0) {
        return null;
    }

    // Check if split is to be accepted
    if (FayyadAndIranisMDL(priorCounts, bestCounts, numInstances, numCutPoints)) {

        // Select split points for the left and right subsets
        left = cutPointsForSubset(attIndex, first, bestIndex + 1);
        right = cutPointsForSubset(attIndex, bestIndex + 1, lastPlusOne);

        // Merge cut points and return them
        if ((left == null) && (right) == null) {
            cutPoints = new double[1];
            cutPoints[0] = bestCutPoint;
        } else if (right == null) {
            cutPoints = new double[left.length + 1];
            System.arraycopy(left, 0, cutPoints, 0, left.length);
            cutPoints[left.length] = bestCutPoint;
        } else if (left == null) {
            cutPoints = new double[1 + right.length];
            cutPoints[0] = bestCutPoint;
            System.arraycopy(right, 0, cutPoints, 1, right.length);
        } else {
            cutPoints = new double[left.length + right.length + 1];
            System.arraycopy(left, 0, cutPoints, 0, left.length);
            cutPoints[left.length] = bestCutPoint;
            System.arraycopy(right, 0, cutPoints, left.length + 1, right.length);
        }

        return cutPoints;
    } else {
        return null;
    }
}