List of usage examples for org.apache.commons.math.stat.inference OneWayAnova anovaPValue
double anovaPValue(Collection<double[]> categoryData) throws IllegalArgumentException, MathException;
double[] arrays. From source file:rs.fon.whibo.GDT.component.removeInsignificantAttributes.FTestNumerical.java
public LinkedList<Attribute> removeAttributes(ExampleSet exampleSet, LinkedList<Attribute> attributesForSplitting) { // checks if the example set is pure, and if it is, it exits the method Attribute label = exampleSet.getAttributes().getLabel(); if (Tools.getAllCategories(exampleSet, label).size() < 2) return attributesForSplitting; // selects the attributes to be evaluated for removal (by calculating // F-test probability for each attribute) ArrayList<Attribute> attributesToRemove = new ArrayList<Attribute>(); ArrayList<Double> attributeProbabilities = new ArrayList<Double>(); for (Attribute attr : attributesForSplitting) if (attr.isNumerical()) { // calculate F-test probability of the attribute double probability = 0; try { OneWayAnova fTest = new OneWayAnovaImpl(); List<double[]> paramForFTest = getArraysByLabel(exampleSet, attr); // tests if no arrays for f-test has fewer that 2 elements boolean fTestImpossible = false; for (double[] i : paramForFTest) if (i.length < 2) fTestImpossible = true; // calculates ftest probability if (!fTestImpossible) probability = fTest.anovaPValue(paramForFTest); } catch (Exception e) { // System.out.println("Error in calculating math formula (FTest)"); }// w w w. j a v a 2s.c o m // add the attribute to the list attributesToRemove.add(attr); attributeProbabilities.add(new Double(probability)); } if (attributesToRemove.size() == 0) return attributesForSplitting; // calculates the percentile of the required percentage. Percentile // variable in code represents the percentage of attributes to be kept // (not removed) double percentile; DescriptiveStatistics stat = new DescriptiveStatistics(); for (Double d : attributeProbabilities) stat.addValue(d.doubleValue()); percentile = stat.getPercentile((1 - Percentage_Remove) * 100); // evaluates attributes and chooses the ones for removal (actually saves // the ones not for removal) Iterator<Attribute> iattr = attributesToRemove.iterator(); Iterator<Double> iprob = attributeProbabilities.iterator(); while (iattr.hasNext()) { iattr.next(); Double prob = iprob.next(); if (Use_Percentage_Instead == 0) { if (prob <= Alpha_Value) { iattr.remove(); iprob.remove(); } } else { if (prob <= percentile) { iattr.remove(); iprob.remove(); } } } // removes the attributes for (Attribute attr : attributesToRemove) attributesForSplitting.remove(attr); return attributesForSplitting; }