List of usage examples for weka.core ContingencyTables entropy
public static double entropy(double[] array)
From source file:moa.reduction.bayes.PIDdiscretize.java
License:Open Source License
private double[] cutPointsForSubset(int attIndex, int first, int lastPlusOne) { //Map<Integer, Double> counts, bestCounts; double[] left, right, cutPoints; //double step = ((float) totalCount) / m_CutPointsL1.get(index).size(); double currentCutPoint = -Double.MAX_VALUE, bestCutPoint = -1, currentEntropy, bestEntropy, priorEntropy, gain;//from www . j a v a 2 s. c o m int bestIndex = -1, numCutPoints = 0; double numInstances = 0; // Compute number of instances in set if ((lastPlusOne - first) < 2) { return null; } // Get the greatest class observed till here int numClasses = 0; for (int i = first; i < lastPlusOne; i++) { Map<Integer, Float> classDist = m_Distrib.get(attIndex).get(i); for (Integer key : classDist.keySet()) { if (key > numClasses) { numClasses = key; } } } numClasses += 1; // Compute class counts. double[][] counts = new double[2][numClasses]; for (int i = first; i < lastPlusOne; i++) { Map<Integer, Float> classDist = m_Distrib.get(attIndex).get(i); for (Map.Entry<Integer, Float> entry : classDist.entrySet()) { counts[1][entry.getKey()] += entry.getValue(); numInstances += entry.getValue(); } } // Save prior counts double[] priorCounts = new double[numClasses]; System.arraycopy(counts[1], 0, priorCounts, 0, numClasses); // Entropy of the full set priorEntropy = ContingencyTables.entropy(priorCounts); bestEntropy = priorEntropy; priorEntropy = ContingencyTables.entropy(priorCounts); bestEntropy = priorEntropy; // Find best entropy. double[][] bestCounts = new double[2][numClasses]; for (int i = first; i < (lastPlusOne - 1); i++) { Map<Integer, Float> classDist = m_Distrib.get(attIndex).get(i); for (Map.Entry<Integer, Float> entry : classDist.entrySet()) { counts[0][entry.getKey()] += entry.getValue(); counts[1][entry.getKey()] -= entry.getValue(); } currentCutPoint = m_CutPointsL1.get(attIndex).get(i); currentEntropy = ContingencyTables.entropyConditionedOnRows(counts); if (currentEntropy < bestEntropy) { bestCutPoint = currentCutPoint; bestEntropy = currentEntropy; bestIndex = i; System.arraycopy(counts[0], 0, bestCounts[0], 0, numClasses); System.arraycopy(counts[1], 0, bestCounts[1], 0, numClasses); } numCutPoints++; } // Use worse encoding? if (!m_UseBetterEncoding) { numCutPoints = (lastPlusOne - first) - 1; } // Checks if gain is zero gain = priorEntropy - bestEntropy; if (gain <= 0) { return null; } // Check if split is to be accepted if (FayyadAndIranisMDL(priorCounts, bestCounts, numInstances, numCutPoints)) { // Select split points for the left and right subsets left = cutPointsForSubset(attIndex, first, bestIndex + 1); right = cutPointsForSubset(attIndex, bestIndex + 1, lastPlusOne); // Merge cut points and return them if ((left == null) && (right) == null) { cutPoints = new double[1]; cutPoints[0] = bestCutPoint; } else if (right == null) { cutPoints = new double[left.length + 1]; System.arraycopy(left, 0, cutPoints, 0, left.length); cutPoints[left.length] = bestCutPoint; } else if (left == null) { cutPoints = new double[1 + right.length]; cutPoints[0] = bestCutPoint; System.arraycopy(right, 0, cutPoints, 1, right.length); } else { cutPoints = new double[left.length + right.length + 1]; System.arraycopy(left, 0, cutPoints, 0, left.length); cutPoints[left.length] = bestCutPoint; System.arraycopy(right, 0, cutPoints, left.length + 1, right.length); } return cutPoints; } else { return null; } }