List of usage examples for weka.core Utils gr
public staticboolean gr(double a, double b)
From source file:MultiClassClassifier.java
License:Open Source License
/** * Returns the distribution for an instance. * * @param inst the instance to get the distribution for * @return the distribution//www . j a v a 2 s . c o m * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance inst) throws Exception { if (m_Classifiers.length == 1) { return m_Classifiers[0].distributionForInstance(inst); } double[] probs = new double[inst.numClasses()]; if (m_Method == METHOD_1_AGAINST_1) { double[][] r = new double[inst.numClasses()][inst.numClasses()]; double[][] n = new double[inst.numClasses()][inst.numClasses()]; for (int i = 0; i < m_ClassFilters.length; i++) { if (m_Classifiers[i] != null) { Instance tempInst = (Instance) inst.copy(); tempInst.setDataset(m_TwoClassDataset); double[] current = m_Classifiers[i].distributionForInstance(tempInst); Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices()); range.setUpper(m_ClassAttribute.numValues()); int[] pair = range.getSelection(); if (m_pairwiseCoupling && inst.numClasses() > 2) { r[pair[0]][pair[1]] = current[0]; n[pair[0]][pair[1]] = m_SumOfWeights[i]; } else { if (current[0] > current[1]) { probs[pair[0]] += 1.0; } else { probs[pair[1]] += 1.0; } } } } if (m_pairwiseCoupling && inst.numClasses() > 2) { return pairwiseCoupling(n, r); } } else { // error correcting style methods for (int i = 0; i < m_ClassFilters.length; i++) { m_ClassFilters[i].input(inst); m_ClassFilters[i].batchFinished(); double[] current = m_Classifiers[i].distributionForInstance(m_ClassFilters[i].output()); //Calibrate the binary classifier scores for (int j = 0; j < m_ClassAttribute.numValues(); j++) { if (((MakeIndicator) m_ClassFilters[i]).getValueRange().isInRange(j)) { probs[j] += current[1]; } else { probs[j] += current[0]; } } } } if (Utils.gr(Utils.sum(probs), 0)) { Utils.normalize(probs); return probs; } else { return m_ZeroR.distributionForInstance(inst); } }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Outputs the performance statistics in summary form. Lists number (and * percentage) of instances classified correctly, incorrectly and * unclassified. Outputs the total number of instances classified, and the * number of instances (if any) that had no class value provided. * //from w w w . j a v a 2s. c o m * @param title the title for the statistics * @param printComplexityStatistics if true, complexity statistics are * returned as well * @return the summary as a String */ public String toSummaryString(String title, boolean printComplexityStatistics) { StringBuffer text = new StringBuffer(); if (printComplexityStatistics && m_NoPriors) { printComplexityStatistics = false; System.err.println("Priors disabled, cannot print complexity statistics!"); } text.append(title + "\n"); try { if (m_WithClass > 0) { if (m_ClassIsNominal) { text.append("Correctly Classified Instances "); text.append(Utils.doubleToString(correct(), 12, 4) + " " + Utils.doubleToString(pctCorrect(), 12, 4) + " %\n"); text.append("Incorrectly Classified Instances "); text.append(Utils.doubleToString(incorrect(), 12, 4) + " " + Utils.doubleToString(pctIncorrect(), 12, 4) + " %\n"); text.append("Kappa statistic "); text.append(Utils.doubleToString(kappa(), 12, 4) + "\n"); if (m_CostMatrix != null) { text.append("Total Cost "); text.append(Utils.doubleToString(totalCost(), 12, 4) + "\n"); text.append("Average Cost "); text.append(Utils.doubleToString(avgCost(), 12, 4) + "\n"); } if (printComplexityStatistics) { text.append("K&B Relative Info Score "); text.append(Utils.doubleToString(KBRelativeInformation(), 12, 4) + " %\n"); text.append("K&B Information Score "); text.append(Utils.doubleToString(KBInformation(), 12, 4) + " bits"); text.append(Utils.doubleToString(KBMeanInformation(), 12, 4) + " bits/instance\n"); } } else { text.append("Correlation coefficient "); text.append(Utils.doubleToString(correlationCoefficient(), 12, 4) + "\n"); } if (printComplexityStatistics) { text.append("Class complexity | order 0 "); text.append(Utils.doubleToString(SFPriorEntropy(), 12, 4) + " bits"); text.append(Utils.doubleToString(SFMeanPriorEntropy(), 12, 4) + " bits/instance\n"); text.append("Class complexity | scheme "); text.append(Utils.doubleToString(SFSchemeEntropy(), 12, 4) + " bits"); text.append(Utils.doubleToString(SFMeanSchemeEntropy(), 12, 4) + " bits/instance\n"); text.append("Complexity improvement (Sf) "); text.append(Utils.doubleToString(SFEntropyGain(), 12, 4) + " bits"); text.append(Utils.doubleToString(SFMeanEntropyGain(), 12, 4) + " bits/instance\n"); } text.append("Mean absolute error "); text.append(Utils.doubleToString(meanAbsoluteError(), 12, 4) + "\n"); text.append("Root mean squared error "); text.append(Utils.doubleToString(rootMeanSquaredError(), 12, 4) + "\n"); if (!m_NoPriors) { text.append("Relative absolute error "); text.append(Utils.doubleToString(relativeAbsoluteError(), 12, 4) + " %\n"); text.append("Root relative squared error "); text.append(Utils.doubleToString(rootRelativeSquaredError(), 12, 4) + " %\n"); } } if (Utils.gr(unclassified(), 0)) { text.append("UnClassified Instances "); text.append(Utils.doubleToString(unclassified(), 12, 4) + " " + Utils.doubleToString(pctUnclassified(), 12, 4) + " %\n"); } text.append("Total Number of Instances "); text.append(Utils.doubleToString(m_WithClass, 12, 4) + "\n"); if (m_MissingClass > 0) { text.append("Ignored Class Unknown Instances "); text.append(Utils.doubleToString(m_MissingClass, 12, 4) + "\n"); } } catch (Exception ex) { // Should never occur since the class is known to be nominal // here System.err.println("Arggh - Must be a bug in Evaluation class"); } return text.toString(); }
From source file:ChiSquare.ChiSquaredAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully//from w ww. jav a 2 s . c om */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false); } } }
From source file:cotraining.copy.Evaluation_D.java
License:Open Source License
/** * Outputs the performance statistics in summary form. Lists * number (and percentage) of instances classified correctly, * incorrectly and unclassified. Outputs the total number of * instances classified, and the number of instances (if any) * that had no class value provided. /*from w w w .j a v a 2 s . c o m*/ * * @param title the title for the statistics * @param printComplexityStatistics if true, complexity statistics are * returned as well * @return the summary as a String */ public String toSummaryString(String title, boolean printComplexityStatistics) { StringBuffer text = new StringBuffer(); if (printComplexityStatistics && m_NoPriors) { printComplexityStatistics = false; System.err.println("Priors disabled, cannot print complexity statistics!"); } text.append(title + "\n"); try { if (m_WithClass > 0) { if (m_ClassIsNominal) { text.append("Correctly Classified Instances "); text.append(Utils.doubleToString(correct(), 12, 4) + " " + Utils.doubleToString(pctCorrect(), 12, 4) + " %\n"); text.append("Incorrectly Classified Instances "); text.append(Utils.doubleToString(incorrect(), 12, 4) + " " + Utils.doubleToString(pctIncorrect(), 12, 4) + " %\n"); text.append("Kappa statistic "); text.append(Utils.doubleToString(kappa(), 12, 4) + "\n"); if (m_CostMatrix != null) { text.append("Total Cost "); text.append(Utils.doubleToString(totalCost(), 12, 4) + "\n"); text.append("Average Cost "); text.append(Utils.doubleToString(avgCost(), 12, 4) + "\n"); } if (printComplexityStatistics) { text.append("K&B Relative Info Score "); text.append(Utils.doubleToString(KBRelativeInformation(), 12, 4) + " %\n"); text.append("K&B Information Score "); text.append(Utils.doubleToString(KBInformation(), 12, 4) + " bits"); text.append(Utils.doubleToString(KBMeanInformation(), 12, 4) + " bits/instance\n"); } } else { text.append("Correlation coefficient "); text.append(Utils.doubleToString(correlationCoefficient(), 12, 4) + "\n"); } if (printComplexityStatistics) { text.append("Class complexity | order 0 "); text.append(Utils.doubleToString(SFPriorEntropy(), 12, 4) + " bits"); text.append(Utils.doubleToString(SFMeanPriorEntropy(), 12, 4) + " bits/instance\n"); text.append("Class complexity | scheme "); text.append(Utils.doubleToString(SFSchemeEntropy(), 12, 4) + " bits"); text.append(Utils.doubleToString(SFMeanSchemeEntropy(), 12, 4) + " bits/instance\n"); text.append("Complexity improvement (Sf) "); text.append(Utils.doubleToString(SFEntropyGain(), 12, 4) + " bits"); text.append(Utils.doubleToString(SFMeanEntropyGain(), 12, 4) + " bits/instance\n"); } text.append("Mean absolute error "); text.append(Utils.doubleToString(meanAbsoluteError(), 12, 4) + "\n"); text.append("Root mean squared error "); text.append(Utils.doubleToString(rootMeanSquaredError(), 12, 4) + "\n"); if (!m_NoPriors) { text.append("Relative absolute error "); text.append(Utils.doubleToString(relativeAbsoluteError(), 12, 4) + " %\n"); text.append("Root relative squared error "); text.append(Utils.doubleToString(rootRelativeSquaredError(), 12, 4) + " %\n"); } } if (Utils.gr(unclassified(), 0)) { text.append("UnClassified Instances "); text.append(Utils.doubleToString(unclassified(), 12, 4) + " " + Utils.doubleToString(pctUnclassified(), 12, 4) + " %\n"); } text.append("Total Number of Instances "); text.append(Utils.doubleToString(m_WithClass, 12, 4) + "\n"); if (m_MissingClass > 0) { text.append("Ignored Class Unknown Instances "); text.append(Utils.doubleToString(m_MissingClass, 12, 4) + "\n"); } } catch (Exception ex) { // Should never occur since the class is known to be nominal // here System.err.println("Arggh - Must be a bug in Evaluation class"); } return text.toString(); }
From source file:de.uni_potsdam.hpi.bpt.promnicat.analysisModules.clustering.ProcessInstances.java
License:Open Source License
/** * Checks if the given instance is compatible with this dataset. Only looks * at the size of the instance and the ranges of the values for nominal and * string attributes.//from w w w . jav a2 s . c om * * @param instance * the instance to check * @return true if the instance is compatible with the dataset */ public/* @pure@ */boolean checkInstance(ProcessInstance instance) { if (instance.numAttributes() != numAttributes()) { return false; } if (instance.numStrAttributes() != numStrAttributes()) { return false; } for (int i = 0; i < numAttributes(); i++) { if (instance.isMissing(i)) { continue; } else if (attribute(i).isNominal() || attribute(i).isString()) { if (!(Utils.eq(instance.value(i), (double) (int) instance.value(i)))) { return false; } else if (Utils.sm(instance.value(i), 0) || Utils.gr(instance.value(i), attribute(i).numValues())) { return false; } } } return true; }
From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully/*from www . j a va 2 s.c om*/ */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i])); } } }
From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java
License:Open Source License
/** * Computes chi-squared statistic for a contingency table. * * @param matrix the contigency table//from w ww. j a v a 2s. com * @param useYates is Yates' correction to be used? * @return the value of the chi-squared statistic */ public static double chiVal(double[][] matrix) { int df, nrows, ncols, row, col; double[] rtotal, ctotal; double expect = 0, chival = 0, n = 0; boolean yates = true; nrows = matrix.length; ncols = matrix[0].length; rtotal = new double[nrows]; ctotal = new double[ncols]; for (row = 0; row < nrows; row++) { for (col = 0; col < ncols; col++) { rtotal[row] += matrix[row][col]; ctotal[col] += matrix[row][col]; n += matrix[row][col]; } } df = (nrows - 1) * (ncols - 1); chival = 0.0; for (row = 0; row < nrows; row++) { if (Utils.gr(rtotal[row], 0)) { for (col = 0; col < ncols; col++) { if (Utils.gr(ctotal[col], 0)) { expect = (ctotal[col] * rtotal[row]) / n; if ((df >= 1) && expect > EXPECTED) { yates = false; } else if (df <= 0) { return 0; } else if ((df <= 1) && (expect <= EXPECTED)) { yates = true; } chival += chiCell(matrix[row][col], expect, yates); } } } } return chival; }
From source file:feature.InfoGainEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all * attributes that are numeric./*w ww .ja v a 2 s .c o m*/ * * @param data * set of instances serving as training data * @throws Exception * if the evaluator has not been generated successfully */ public double computeInfoGain(Instances data, int att) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute info gains m_InfoGains = new double[data.numAttributes()]; m_InfoGains[att] = (ContingencyTables.entropyOverColumns(counts[att]) - ContingencyTables.entropyConditionedOnRows(counts[att])); return m_InfoGains[att]; }
From source file:feature.InfoGainEval.java
License:Open Source License
public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else {//from w w w .ja v a 2 s .c o m NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute info gains m_InfoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i]) - ContingencyTables.entropyConditionedOnRows(counts[i])); } } }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Outputs the performance statistics in summary form. Lists * number (and percentage) of instances classified correctly, * incorrectly and unclassified. Outputs the total number of * instances classified, and the number of instances (if any) * that had no class value provided./* w w w . ja v a 2 s. c om*/ * * @param title the title for the statistics * @param printComplexityStatistics if true, complexity statistics are * returned as well * @return the summary as a String */ public String toSummaryString(String title, boolean printComplexityStatistics) { double mae, mad = 0; StringBuffer text = new StringBuffer(); text.append(title + "\n"); try { if (m_WithClass > 0) { if (m_ClassIsNominal) { text.append("Correctly Classified Instances "); text.append(Utils.doubleToString(correct(), 12, 4) + " " + Utils.doubleToString(pctCorrect(), 12, 4) + " %\n"); text.append("Incorrectly Classified Instances "); text.append(Utils.doubleToString(incorrect(), 12, 4) + " " + Utils.doubleToString(pctIncorrect(), 12, 4) + " %\n"); text.append("Kappa statistic "); text.append(Utils.doubleToString(kappa(), 12, 4) + "\n"); if (m_CostMatrix != null) { text.append("Total Cost "); text.append(Utils.doubleToString(totalCost(), 12, 4) + "\n"); text.append("Average Cost "); text.append(Utils.doubleToString(avgCost(), 12, 4) + "\n"); } if (printComplexityStatistics) { text.append("K&B Relative Info Score "); text.append(Utils.doubleToString(KBRelativeInformation(), 12, 4) + " %\n"); text.append("K&B Information Score "); text.append(Utils.doubleToString(KBInformation(), 12, 4) + " bits"); text.append(Utils.doubleToString(KBMeanInformation(), 12, 4) + " bits/instance\n"); } } else { text.append("Correlation coefficient "); text.append(Utils.doubleToString(correlationCoefficient(), 12, 4) + "\n"); } if (printComplexityStatistics) { text.append("Class complexity | order 0 "); text.append(Utils.doubleToString(SFPriorEntropy(), 12, 4) + " bits"); text.append(Utils.doubleToString(SFMeanPriorEntropy(), 12, 4) + " bits/instance\n"); text.append("Class complexity | scheme "); text.append(Utils.doubleToString(SFSchemeEntropy(), 12, 4) + " bits"); text.append(Utils.doubleToString(SFMeanSchemeEntropy(), 12, 4) + " bits/instance\n"); text.append("Complexity improvement (Sf) "); text.append(Utils.doubleToString(SFEntropyGain(), 12, 4) + " bits"); text.append(Utils.doubleToString(SFMeanEntropyGain(), 12, 4) + " bits/instance\n"); } text.append("Mean absolute error "); text.append(Utils.doubleToString(meanAbsoluteError(), 12, 4) + "\n"); text.append("Root mean squared error "); text.append(Utils.doubleToString(rootMeanSquaredError(), 12, 4) + "\n"); text.append("Relative absolute error "); text.append(Utils.doubleToString(relativeAbsoluteError(), 12, 4) + " %\n"); text.append("Root relative squared error "); text.append(Utils.doubleToString(rootRelativeSquaredError(), 12, 4) + " %\n"); } if (Utils.gr(unclassified(), 0)) { text.append("UnClassified Instances "); text.append(Utils.doubleToString(unclassified(), 12, 4) + " " + Utils.doubleToString(pctUnclassified(), 12, 4) + " %\n"); } text.append("Total Number of Instances "); text.append(Utils.doubleToString(m_WithClass, 12, 4) + "\n"); if (m_MissingClass > 0) { text.append("Ignored Class Unknown Instances "); text.append(Utils.doubleToString(m_MissingClass, 12, 4) + "\n"); } } catch (Exception ex) { // Should never occur since the class is known to be nominal // here System.err.println("Arggh - Must be a bug in Evaluation class"); } return text.toString(); }