List of usage examples for weka.core ContingencyTables reduceMatrix
public static double[][] reduceMatrix(double[][] matrix)
From source file:ChiSquare.ChiSquaredAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully//from w ww . j av a 2 s. c om */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false); } } }
From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully//www . j av a 2s. c o m */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i])); } } }