List of usage examples for org.apache.commons.math.stat.inference ChiSquareTestImpl chiSquareTest
public double chiSquareTest(long[][] counts) throws IllegalArgumentException, MathException
From source file:rs.fon.whibo.GDT.component.removeInsignificantAttributes.ChiSquareTestCategorical.java
@Override public LinkedList<Attribute> removeAttributes(ExampleSet exampleSet, LinkedList<Attribute> attributesForSplitting) { // checks if the example set is pure, and if it is, it exits the method Attribute label = exampleSet.getAttributes().getLabel(); if (Tools.getAllCategories(exampleSet, label).size() < 2) return attributesForSplitting; // selects the attributes to be evaluated for removal (by calculating // chi-square probability for each attribute) ArrayList<Attribute> attributesToRemove = new ArrayList<Attribute>(); ArrayList<Double> attributeProbabilities = new ArrayList<Double>(); for (Attribute attr : attributesForSplitting) if (attr.isNominal()) { // calculate chi-square probability of the attribute double probability = 0; try { long[][] matrixForAttribute = getContigencyTable(exampleSet, attr); ChiSquareTestImpl chiTest = new ChiSquareTestImpl(); probability = chiTest.chiSquareTest(matrixForAttribute); } catch (MathException me) { // System.out.println("Error in calculating math formula (chiTest)"); }//from w ww . j a va 2s. c o m // add the attribute to the list attributesToRemove.add(attr); attributeProbabilities.add(new Double(probability)); } // calculates the percentile of the required percentage. Percentile // variable in code represents the percentage of attributes to be kept // (not removed) double percentile; DescriptiveStatistics stat = new DescriptiveStatistics(); for (Double d : attributeProbabilities) stat.addValue(d.doubleValue()); percentile = stat.getPercentile((1 - Percentage_Remove) * 100); // evaluates attributes and chooses the ones for removal (actually saves // the ones not for removal) Iterator<Attribute> iattr = attributesToRemove.iterator(); Iterator<Double> iprob = attributeProbabilities.iterator(); while (iattr.hasNext()) { iattr.next(); Double prob = iprob.next(); if (Use_Percentage_Instead == 0) { if (prob <= Alpha_Value) { iattr.remove(); iprob.remove(); } } else { if (prob <= percentile) { iattr.remove(); iprob.remove(); } } } // removes the attributes for (Attribute attr : attributesToRemove) attributesForSplitting.remove(attr); return attributesForSplitting; }