List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getStandardDeviation
public double getStandardDeviation()
From source file:util.Statistics.java
public Statistics(List<Integer> list) { scores = intsToDoubles(list);// w w w. j a va 2s . co m DescriptiveStatistics dStats = new DescriptiveStatistics(scores); summaryStatistics.put("min", dStats.getMin()); // Minimum summaryStatistics.put("q1", dStats.getPercentile(25)); // Lower Quartile (Q1) summaryStatistics.put("q2", dStats.getPercentile(50)); // Middle Quartile (Median - Q2) summaryStatistics.put("q3", dStats.getPercentile(75)); // High Quartile (Q3) summaryStatistics.put("max", dStats.getMax()); // Maxiumum summaryStatistics.put("mean", dStats.getMean()); // Mean summaryStatistics.put("sd", dStats.getStandardDeviation()); // Standard Deviation EmpiricalDistribution distribution = new EmpiricalDistribution(NUM_BINS); distribution.load(scores); List<SummaryStatistics> binStats = distribution.getBinStats(); double[] upperBounds = distribution.getUpperBounds(); Double lastUpperBound = upperBounds[0]; bins.add(new Pair<Pair<Double, Double>, Long>( new Pair<Double, Double>(summaryStatistics.get("min"), lastUpperBound), binStats.get(0).getN())); for (int i = 1; i < binStats.size(); i++) { bins.add(new Pair<Pair<Double, Double>, Long>(new Pair<Double, Double>(lastUpperBound, upperBounds[i]), binStats.get(i).getN())); lastUpperBound = upperBounds[i]; } if (list.size() > 5 && dStats.getStandardDeviation() > 0) // Only remove outliers if relatively normal { double mean = dStats.getMean(); double stDev = dStats.getStandardDeviation(); NormalDistribution normalDistribution = new NormalDistribution(mean, stDev); Iterator<Integer> listIterator = list.iterator(); double significanceLevel = .50 / list.size(); // Chauvenet's Criterion for Outliers while (listIterator.hasNext()) { int num = listIterator.next(); double pValue = normalDistribution.cumulativeProbability(num); if (pValue < significanceLevel) { outliers.add(num); listIterator.remove(); } } if (list.size() != dStats.getN()) // If and only if outliers have been removed { double[] significantData = intsToDoubles(list); dStats = new DescriptiveStatistics(significantData); summaryStatistics.put("min", dStats.getMin()); summaryStatistics.put("max", dStats.getMax()); summaryStatistics.put("mean", dStats.getMean()); summaryStatistics.put("sd", dStats.getStandardDeviation()); } } }
From source file:weka.classifiers.timeseries.eval.TSEvaluation.java
public double CalcOutlierTreshold(DescriptiveStatistics MAPEstats) throws Exception { double tresh = 0; mape = MAPEstats.getMean();//result[i]; std = MAPEstats.getStandardDeviation(); median = MAPEstats.getPercentile(50); upperPercentile = MAPEstats.getPercentile(85); if (readTreshsFromFile == false) { if (mape < 1) { if (std < 5) { tresh = upperPercentile + 3 * std + mape + median; ;//from w w w . j a v a 2 s . com } else if ((std >= 5) && (std < 10)) { tresh = upperPercentile + 1.5 * std + mape + median; } else if ((std >= 10)) { tresh = upperPercentile + 1 * std + mape + median; } } else if ((mape >= 1) && (mape < 2)) { if (std < 5) { tresh = upperPercentile + 3 * std + mape + median; ; } else if ((std >= 5) && (std < 10)) { tresh = upperPercentile + 2.5 * std + mape + median; } else if ((std >= 10)) { tresh = upperPercentile + 1 * std + mape + median; } } else if ((mape >= 2) && (mape < 5)) { if (std < 5) { tresh = 2 * upperPercentile + std + mape + 1.25 * median; ; } else if ((std >= 5) && (std < 10)) { tresh = 1 * upperPercentile + std + mape + 2.75 * median; } else if ((std >= 10)) { tresh = 0.5 * upperPercentile + std + mape + median; } } else if ((mape >= 5) && (mape < 10)) { if (std < 5) { tresh = 1.5 * upperPercentile + std + mape + 1.25 * median; } else if ((std >= 5) && (std < 7.5)) { tresh = 0.85 * upperPercentile + std + mape + 2.5 * median; } else if ((std >= 7.5) && (std < 10)) { tresh = 0.85 * upperPercentile + std + mape + 2.5 * median; } else if ((std >= 10)) { tresh = 0.85 * upperPercentile + std + mape + 2 * median; } } else if ((mape >= 10) && (mape < 15)) { if (std < 5) { tresh = 0.75 * upperPercentile + std + mape + 2 * median; } else if ((std >= 5) && (std < 10)) { tresh = 0.5 * upperPercentile + std + mape + 1.65 * median; } else if ((std >= 10) && (std < 15)) { tresh = 2 * upperPercentile + std + mape + 2.5 * median; } else if ((std >= 15) && (std < 20)) { tresh = 2 * upperPercentile + std + mape + 2 * median; } else if ((std >= 20)) { tresh = 0.5 * upperPercentile + std + mape + 2 * median; } } else if ((mape >= 15) && (mape < 20)) { if (std < 10) { tresh = 1 * upperPercentile + std + mape + 2 * median; } else if ((std >= 10) && (std < 15)) { tresh = 1 * upperPercentile + std + mape + 3 * median; } else if ((std >= 15) && (std < 20)) { tresh = 0.5 * upperPercentile + std + mape + 2 * median; } else if ((std >= 20)) { tresh = 1 * upperPercentile + std + mape + 2 * median; } } else if ((mape >= 20) && (mape < 30)) { if (std < 10) { tresh = 1 * upperPercentile + std + mape + 2 * median; } else if ((std >= 10) && (std < 20)) { tresh = 1.25 * upperPercentile + std + mape + 2 * median; } else if ((std >= 20)) { tresh = 1 * upperPercentile + std + mape + 2 * median; } //if (tresh>(3.5*mape)) // tresh=3.5*mape; } else { if (std < 10) { tresh = 1 * upperPercentile + std + mape + median; } else if ((std >= 10) && (std < 20)) { tresh = 1 * upperPercentile + std + mape + median; } else if ((std >= 20)) { tresh = 0.5 * upperPercentile + std + mape + median; } //if (tresh>(3*mape)) // tresh=3*mape; } } else { tresh = treshold; } return tresh; /* if (upperPercentile>25) factor=3; else if ((upperPercentile>15)&&(upperPercentile>25)) factor=2; else{ factor=1.5; } if (mape<15){ if ((mape<4)&&(median<=2)&&(std>7)){ tresh= 4*factor*upperPercentile+2*std+mape+median;//+ } else if ((median<4.0)&&(std<=7)){ tresh= factor*upperPercentile+std+mape+median;//+ } else if ((mape<10)&&(std>12)){ tresh= factor*upperPercentile+mape+std; } else if ((mape<10)){ tresh= upperPercentile+std+2*median;//factor*upperPercentile;//+2*median } else { tresh =factor*upperPercentile+mape;//2.5*(std+mape);//+median if (std>=10) tresh=tresh+std; } if (tresh<1) tresh=2*tresh; } else{ tresh =upperPercentile+2*(mape+median)+(std/2); if (tresh>(3.5*mape)) tresh=3.5*mape; } */ // return tresh; }