Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getStandardDeviation

List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getStandardDeviation

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getStandardDeviation.

Prototype

public double getStandardDeviation() 

Source Link

Document

Returns the standard deviation of the available values.

Usage

From source file:util.Statistics.java

public Statistics(List<Integer> list) {
    scores = intsToDoubles(list);//  w w  w.  j  a va  2s . co m
    DescriptiveStatistics dStats = new DescriptiveStatistics(scores);

    summaryStatistics.put("min", dStats.getMin()); // Minimum
    summaryStatistics.put("q1", dStats.getPercentile(25)); // Lower Quartile (Q1)
    summaryStatistics.put("q2", dStats.getPercentile(50)); // Middle Quartile (Median - Q2)
    summaryStatistics.put("q3", dStats.getPercentile(75)); // High Quartile (Q3)
    summaryStatistics.put("max", dStats.getMax()); // Maxiumum

    summaryStatistics.put("mean", dStats.getMean()); // Mean
    summaryStatistics.put("sd", dStats.getStandardDeviation()); // Standard Deviation

    EmpiricalDistribution distribution = new EmpiricalDistribution(NUM_BINS);
    distribution.load(scores);
    List<SummaryStatistics> binStats = distribution.getBinStats();
    double[] upperBounds = distribution.getUpperBounds();

    Double lastUpperBound = upperBounds[0];
    bins.add(new Pair<Pair<Double, Double>, Long>(
            new Pair<Double, Double>(summaryStatistics.get("min"), lastUpperBound), binStats.get(0).getN()));
    for (int i = 1; i < binStats.size(); i++) {
        bins.add(new Pair<Pair<Double, Double>, Long>(new Pair<Double, Double>(lastUpperBound, upperBounds[i]),
                binStats.get(i).getN()));
        lastUpperBound = upperBounds[i];
    }

    if (list.size() > 5 && dStats.getStandardDeviation() > 0) // Only remove outliers if relatively normal
    {
        double mean = dStats.getMean();
        double stDev = dStats.getStandardDeviation();
        NormalDistribution normalDistribution = new NormalDistribution(mean, stDev);

        Iterator<Integer> listIterator = list.iterator();
        double significanceLevel = .50 / list.size(); // Chauvenet's Criterion for Outliers
        while (listIterator.hasNext()) {
            int num = listIterator.next();
            double pValue = normalDistribution.cumulativeProbability(num);
            if (pValue < significanceLevel) {
                outliers.add(num);
                listIterator.remove();
            }
        }

        if (list.size() != dStats.getN()) // If and only if outliers have been removed
        {
            double[] significantData = intsToDoubles(list);
            dStats = new DescriptiveStatistics(significantData);

            summaryStatistics.put("min", dStats.getMin());
            summaryStatistics.put("max", dStats.getMax());
            summaryStatistics.put("mean", dStats.getMean());
            summaryStatistics.put("sd", dStats.getStandardDeviation());
        }
    }
}

From source file:weka.classifiers.timeseries.eval.TSEvaluation.java

public double CalcOutlierTreshold(DescriptiveStatistics MAPEstats) throws Exception {
    double tresh = 0;
    mape = MAPEstats.getMean();//result[i];
    std = MAPEstats.getStandardDeviation();
    median = MAPEstats.getPercentile(50);
    upperPercentile = MAPEstats.getPercentile(85);
    if (readTreshsFromFile == false) {
        if (mape < 1) {
            if (std < 5) {
                tresh = upperPercentile + 3 * std + mape + median;
                ;//from  w  w w  . j  a v  a  2  s .  com
            } else if ((std >= 5) && (std < 10)) {
                tresh = upperPercentile + 1.5 * std + mape + median;
            } else if ((std >= 10)) {
                tresh = upperPercentile + 1 * std + mape + median;
            }
        } else if ((mape >= 1) && (mape < 2)) {
            if (std < 5) {
                tresh = upperPercentile + 3 * std + mape + median;
                ;
            } else if ((std >= 5) && (std < 10)) {
                tresh = upperPercentile + 2.5 * std + mape + median;
            } else if ((std >= 10)) {
                tresh = upperPercentile + 1 * std + mape + median;
            }

        } else if ((mape >= 2) && (mape < 5)) {
            if (std < 5) {
                tresh = 2 * upperPercentile + std + mape + 1.25 * median;
                ;
            } else if ((std >= 5) && (std < 10)) {
                tresh = 1 * upperPercentile + std + mape + 2.75 * median;
            } else if ((std >= 10)) {
                tresh = 0.5 * upperPercentile + std + mape + median;
            }

        } else if ((mape >= 5) && (mape < 10)) {
            if (std < 5) {
                tresh = 1.5 * upperPercentile + std + mape + 1.25 * median;
            } else if ((std >= 5) && (std < 7.5)) {
                tresh = 0.85 * upperPercentile + std + mape + 2.5 * median;
            } else if ((std >= 7.5) && (std < 10)) {
                tresh = 0.85 * upperPercentile + std + mape + 2.5 * median;
            } else if ((std >= 10)) {
                tresh = 0.85 * upperPercentile + std + mape + 2 * median;
            }
        } else if ((mape >= 10) && (mape < 15)) {
            if (std < 5) {
                tresh = 0.75 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 5) && (std < 10)) {
                tresh = 0.5 * upperPercentile + std + mape + 1.65 * median;
            } else if ((std >= 10) && (std < 15)) {
                tresh = 2 * upperPercentile + std + mape + 2.5 * median;
            } else if ((std >= 15) && (std < 20)) {
                tresh = 2 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 20)) {
                tresh = 0.5 * upperPercentile + std + mape + 2 * median;
            }
        } else if ((mape >= 15) && (mape < 20)) {
            if (std < 10) {
                tresh = 1 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 10) && (std < 15)) {
                tresh = 1 * upperPercentile + std + mape + 3 * median;
            } else if ((std >= 15) && (std < 20)) {
                tresh = 0.5 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 20)) {
                tresh = 1 * upperPercentile + std + mape + 2 * median;
            }
        } else if ((mape >= 20) && (mape < 30)) {
            if (std < 10) {
                tresh = 1 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 10) && (std < 20)) {
                tresh = 1.25 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 20)) {
                tresh = 1 * upperPercentile + std + mape + 2 * median;
            }
            //if (tresh>(3.5*mape))
            //   tresh=3.5*mape;
        } else {
            if (std < 10) {
                tresh = 1 * upperPercentile + std + mape + median;
            } else if ((std >= 10) && (std < 20)) {
                tresh = 1 * upperPercentile + std + mape + median;
            } else if ((std >= 20)) {
                tresh = 0.5 * upperPercentile + std + mape + median;
            }
            //if (tresh>(3*mape))
            //   tresh=3*mape;
        }
    } else {
        tresh = treshold;
    }
    return tresh;
    /*       if (upperPercentile>25)
              factor=3;
           else if ((upperPercentile>15)&&(upperPercentile>25))
              factor=2;
           else{
              factor=1.5;
           }
           if (mape<15){
              if ((mape<4)&&(median<=2)&&(std>7)){
     tresh= 4*factor*upperPercentile+2*std+mape+median;//+
              }
              else if ((median<4.0)&&(std<=7)){
     tresh= factor*upperPercentile+std+mape+median;//+
              }
              else if ((mape<10)&&(std>12)){
     tresh= factor*upperPercentile+mape+std;
              }
              else if ((mape<10)){
     tresh= upperPercentile+std+2*median;//factor*upperPercentile;//+2*median
              }
              else { 
     tresh =factor*upperPercentile+mape;//2.5*(std+mape);//+median
     if (std>=10)
        tresh=tresh+std;
              }
              if (tresh<1)
     tresh=2*tresh;
            
           }
           else{
              tresh =upperPercentile+2*(mape+median)+(std/2);
              if (tresh>(3.5*mape))
        tresh=3.5*mape;
           }
    */
    //  return tresh;
}