Example usage for org.apache.commons.math.stat.inference ChiSquareTestImpl chiSquareTest

List of usage examples for org.apache.commons.math.stat.inference ChiSquareTestImpl chiSquareTest

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.inference ChiSquareTestImpl chiSquareTest.

Prototype

public double chiSquareTest(long[][] counts) throws IllegalArgumentException, MathException 

Source Link

Usage

From source file:rs.fon.whibo.GDT.component.removeInsignificantAttributes.ChiSquareTestCategorical.java

@Override
public LinkedList<Attribute> removeAttributes(ExampleSet exampleSet,
        LinkedList<Attribute> attributesForSplitting) {

    // checks if the example set is pure, and if it is, it exits the method
    Attribute label = exampleSet.getAttributes().getLabel();
    if (Tools.getAllCategories(exampleSet, label).size() < 2)
        return attributesForSplitting;

    // selects the attributes to be evaluated for removal (by calculating
    // chi-square probability for each attribute)
    ArrayList<Attribute> attributesToRemove = new ArrayList<Attribute>();
    ArrayList<Double> attributeProbabilities = new ArrayList<Double>();
    for (Attribute attr : attributesForSplitting)
        if (attr.isNominal()) {
            // calculate chi-square probability of the attribute
            double probability = 0;
            try {
                long[][] matrixForAttribute = getContigencyTable(exampleSet, attr);
                ChiSquareTestImpl chiTest = new ChiSquareTestImpl();
                probability = chiTest.chiSquareTest(matrixForAttribute);
            } catch (MathException me) {
                // System.out.println("Error in calculating math formula (chiTest)");
            }//from   w ww  . j a  va  2s. c  o m
            // add the attribute to the list
            attributesToRemove.add(attr);
            attributeProbabilities.add(new Double(probability));
        }

    // calculates the percentile of the required percentage. Percentile
    // variable in code represents the percentage of attributes to be kept
    // (not removed)
    double percentile;
    DescriptiveStatistics stat = new DescriptiveStatistics();
    for (Double d : attributeProbabilities)
        stat.addValue(d.doubleValue());
    percentile = stat.getPercentile((1 - Percentage_Remove) * 100);

    // evaluates attributes and chooses the ones for removal (actually saves
    // the ones not for removal)
    Iterator<Attribute> iattr = attributesToRemove.iterator();
    Iterator<Double> iprob = attributeProbabilities.iterator();
    while (iattr.hasNext()) {
        iattr.next();
        Double prob = iprob.next();
        if (Use_Percentage_Instead == 0) {
            if (prob <= Alpha_Value) {
                iattr.remove();
                iprob.remove();
            }
        } else {
            if (prob <= percentile) {
                iattr.remove();
                iprob.remove();
            }
        }
    }

    // removes the attributes
    for (Attribute attr : attributesToRemove)
        attributesForSplitting.remove(attr);
    return attributesForSplitting;
}