List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getPercentile
public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException
From source file:streaming.core.WindowOperation.java
@Override public Object[][] process(Object[] event) { long day = (Long) event[0]; String word = (String) event[1]; long freqs = (Long) event[2]; TreeMap<Long, Long> sortedFreq = map.get(word); if (sortedFreq == null) { sortedFreq = new TreeMap<Long, Long>(); map.put(word, sortedFreq);// w w w .j a va2 s . c o m } Long t = sortedFreq.get(day); if (t != null) { freqs = freqs + t; } sortedFreq.put(day, freqs); Iterator<Entry<Long, Long>> iterator = sortedFreq.headMap(1 + day - numberOfDays).entrySet().iterator(); while (iterator.hasNext()) { iterator.next(); iterator.remove(); } DescriptiveStatistics stats = new DescriptiveStatistics(); long dayIndex = 1 + day - numberOfDays; for (Entry<Long, Long> e : sortedFreq.entrySet()) { while (e.getKey() > dayIndex) { dayIndex++; stats.addValue(0); } stats.addValue(e.getValue()); } if (sortedFreq.size() > numberOfDays) { System.out.println(day + " size=" + sortedFreq.size() + " " + sortedFreq); } double mean = stats.getMean(); double meadian = stats.getPercentile(50); mean = (mean == 0) ? 1 : mean; meadian = (meadian == 0) ? 1 : meadian; double stddev = stats.getStandardDeviation(); stddev = (stddev == 0) ? 1 : stddev; double cov = stddev / mean; //double swna = Math.log(freqs)*freqs/stats.getMean(); double swna1 = Math.log(meadian) * Math.abs(freqs - meadian) / stddev; if (Double.isNaN(swna1)) { System.out.println(); } double swna2 = Math.abs(freqs - meadian) / stddev; double swna3 = freqs / (meadian * cov); Gaussian gaussian = new Gaussian(100, 50); double swna4 = (0.1 + 100 * gaussian.value(meadian)) * freqs / (meadian * cov); int percentageAvialableValues = Math.round(100 * sortedFreq.size() / numberOfDays); //System.out.println("#"+ word + " " + freqs + " "+ stats.getMean() + Arrays.toString(stats.getValues())); return new Object[][] { { day, word, swna1, freqs, stats.getMean(), meadian, stddev, swna2, swna3, swna4, cov, percentageAvialableValues } }; // if(freqs > 3 && swna> 5){ // return new Object[][]{{day, word, swna}}; // }else{ // return null; // } }
From source file:tools.descartes.bungee.utils.DiffUtil.java
public static <T> StatsPercentile statisticsForDiffs(List<T> objects, double percent) { // Get a DescriptiveStatistics instance DescriptiveStatistics statistics = new DescriptiveStatistics(); // Add the data from the array for (T object : objects) { if (object instanceof Long) { statistics.addValue((Long) object); } else if (object instanceof Double) { statistics.addValue((Double) object); } else if (object instanceof AbstractResponse) { statistics.addValue(((AbstractResponse) object).getResponseTime()); }//from w ww .ja va 2 s . c om } StatsPercentile stats = new StatsPercentile(); // Compute some statistics stats.mean = statistics.getMean(); stats.std = statistics.getStandardDeviation(); stats.max = Math.max(statistics.getMax(), -statistics.getMin()); stats.percent = percent; stats.percentile = statistics.getPercentile(percent); return stats; }
From source file:uk.ac.diamond.scisoft.analysis.diffraction.PowderRingsUtils.java
/** * Find major axes by looking along thick line given by relative coordinates to centre for * maximum intensity values//from w w w .j a v a2 s . c o m * @param mon * @param axes * @param image * @param mask * @param roi * @param offset minimum position of peaks * @param width of line * @param centre * @param dx * @param dy */ private static void findMajorAxes(IMonitor mon, TreeSet<Double> axes, Dataset image, Dataset mask, EllipticalROI roi, double offset, double width, double[] centre, double dx, double dy) { RectangularROI rroi = new RectangularROI(); rroi.setPoint(centre); rroi.setAngle(Math.atan2(dy, dx)); rroi.setLengths(Math.hypot(dx, dy), width); rroi.translate(0, -0.5); rroi.setClippingCompensation(true); Dataset profile = ROIProfile.maxInBox(image, mask, rroi)[0]; List<IdentifiedPeak> peaks = Generic1DFitter .findPeaks(DatasetFactory.createRange(profile.getSize(), Dataset.INT), profile, PEAK_SMOOTHING); if (mon != null) mon.worked(profile.getSize()); System.err.printf("\n"); DescriptiveStatistics stats = new DescriptiveStatistics(); int[] pb = new int[1]; int[] pe = new int[1]; for (IdentifiedPeak p : peaks) { if (p.getPos() < offset) { continue; } pb[0] = (int) p.getMinXVal(); pe[0] = (int) p.getMaxXVal(); p.setArea((Double) profile.getSlice(pb, pe, null).sum()); stats.addValue(p.getArea()); System.err.printf("P %f A %f W %f H %f\n", p.getPos(), p.getArea(), p.getFWHM(), p.getHeight()); } double area = stats.getMean() + 0.4 * (stats.getPercentile(75) - stats.getPercentile(25)); logger.debug("Area: {}", stats); logger.debug("Minimum threshold: {}", area); double majorFactor = roi.getSemiAxis(0) / roi.getDistance(rroi.getAngle()); double maxFWHM = MAX_FWHM_FACTOR * width; for (IdentifiedPeak p : peaks) { double l = p.getPos(); if (l < offset) { continue; } // System.err.println(p); // filter on area and FWHM if (p.getFWHM() > maxFWHM) { continue; } if (p.getArea() < area) { break; } axes.add(l * majorFactor); } if (mon != null) mon.worked(peaks.size()); }
From source file:util.Statistics.java
public Statistics(List<Integer> list) { scores = intsToDoubles(list);// w ww . java2 s. c o m DescriptiveStatistics dStats = new DescriptiveStatistics(scores); summaryStatistics.put("min", dStats.getMin()); // Minimum summaryStatistics.put("q1", dStats.getPercentile(25)); // Lower Quartile (Q1) summaryStatistics.put("q2", dStats.getPercentile(50)); // Middle Quartile (Median - Q2) summaryStatistics.put("q3", dStats.getPercentile(75)); // High Quartile (Q3) summaryStatistics.put("max", dStats.getMax()); // Maxiumum summaryStatistics.put("mean", dStats.getMean()); // Mean summaryStatistics.put("sd", dStats.getStandardDeviation()); // Standard Deviation EmpiricalDistribution distribution = new EmpiricalDistribution(NUM_BINS); distribution.load(scores); List<SummaryStatistics> binStats = distribution.getBinStats(); double[] upperBounds = distribution.getUpperBounds(); Double lastUpperBound = upperBounds[0]; bins.add(new Pair<Pair<Double, Double>, Long>( new Pair<Double, Double>(summaryStatistics.get("min"), lastUpperBound), binStats.get(0).getN())); for (int i = 1; i < binStats.size(); i++) { bins.add(new Pair<Pair<Double, Double>, Long>(new Pair<Double, Double>(lastUpperBound, upperBounds[i]), binStats.get(i).getN())); lastUpperBound = upperBounds[i]; } if (list.size() > 5 && dStats.getStandardDeviation() > 0) // Only remove outliers if relatively normal { double mean = dStats.getMean(); double stDev = dStats.getStandardDeviation(); NormalDistribution normalDistribution = new NormalDistribution(mean, stDev); Iterator<Integer> listIterator = list.iterator(); double significanceLevel = .50 / list.size(); // Chauvenet's Criterion for Outliers while (listIterator.hasNext()) { int num = listIterator.next(); double pValue = normalDistribution.cumulativeProbability(num); if (pValue < significanceLevel) { outliers.add(num); listIterator.remove(); } } if (list.size() != dStats.getN()) // If and only if outliers have been removed { double[] significantData = intsToDoubles(list); dStats = new DescriptiveStatistics(significantData); summaryStatistics.put("min", dStats.getMin()); summaryStatistics.put("max", dStats.getMax()); summaryStatistics.put("mean", dStats.getMean()); summaryStatistics.put("sd", dStats.getStandardDeviation()); } } }
From source file:weka.classifiers.timeseries.eval.TSEvaluation.java
public double CalcOutlierTreshold(DescriptiveStatistics MAPEstats) throws Exception { double tresh = 0; mape = MAPEstats.getMean();//result[i]; std = MAPEstats.getStandardDeviation(); median = MAPEstats.getPercentile(50); upperPercentile = MAPEstats.getPercentile(85); if (readTreshsFromFile == false) { if (mape < 1) { if (std < 5) { tresh = upperPercentile + 3 * std + mape + median; ;/*from www.j av a 2s . com*/ } else if ((std >= 5) && (std < 10)) { tresh = upperPercentile + 1.5 * std + mape + median; } else if ((std >= 10)) { tresh = upperPercentile + 1 * std + mape + median; } } else if ((mape >= 1) && (mape < 2)) { if (std < 5) { tresh = upperPercentile + 3 * std + mape + median; ; } else if ((std >= 5) && (std < 10)) { tresh = upperPercentile + 2.5 * std + mape + median; } else if ((std >= 10)) { tresh = upperPercentile + 1 * std + mape + median; } } else if ((mape >= 2) && (mape < 5)) { if (std < 5) { tresh = 2 * upperPercentile + std + mape + 1.25 * median; ; } else if ((std >= 5) && (std < 10)) { tresh = 1 * upperPercentile + std + mape + 2.75 * median; } else if ((std >= 10)) { tresh = 0.5 * upperPercentile + std + mape + median; } } else if ((mape >= 5) && (mape < 10)) { if (std < 5) { tresh = 1.5 * upperPercentile + std + mape + 1.25 * median; } else if ((std >= 5) && (std < 7.5)) { tresh = 0.85 * upperPercentile + std + mape + 2.5 * median; } else if ((std >= 7.5) && (std < 10)) { tresh = 0.85 * upperPercentile + std + mape + 2.5 * median; } else if ((std >= 10)) { tresh = 0.85 * upperPercentile + std + mape + 2 * median; } } else if ((mape >= 10) && (mape < 15)) { if (std < 5) { tresh = 0.75 * upperPercentile + std + mape + 2 * median; } else if ((std >= 5) && (std < 10)) { tresh = 0.5 * upperPercentile + std + mape + 1.65 * median; } else if ((std >= 10) && (std < 15)) { tresh = 2 * upperPercentile + std + mape + 2.5 * median; } else if ((std >= 15) && (std < 20)) { tresh = 2 * upperPercentile + std + mape + 2 * median; } else if ((std >= 20)) { tresh = 0.5 * upperPercentile + std + mape + 2 * median; } } else if ((mape >= 15) && (mape < 20)) { if (std < 10) { tresh = 1 * upperPercentile + std + mape + 2 * median; } else if ((std >= 10) && (std < 15)) { tresh = 1 * upperPercentile + std + mape + 3 * median; } else if ((std >= 15) && (std < 20)) { tresh = 0.5 * upperPercentile + std + mape + 2 * median; } else if ((std >= 20)) { tresh = 1 * upperPercentile + std + mape + 2 * median; } } else if ((mape >= 20) && (mape < 30)) { if (std < 10) { tresh = 1 * upperPercentile + std + mape + 2 * median; } else if ((std >= 10) && (std < 20)) { tresh = 1.25 * upperPercentile + std + mape + 2 * median; } else if ((std >= 20)) { tresh = 1 * upperPercentile + std + mape + 2 * median; } //if (tresh>(3.5*mape)) // tresh=3.5*mape; } else { if (std < 10) { tresh = 1 * upperPercentile + std + mape + median; } else if ((std >= 10) && (std < 20)) { tresh = 1 * upperPercentile + std + mape + median; } else if ((std >= 20)) { tresh = 0.5 * upperPercentile + std + mape + median; } //if (tresh>(3*mape)) // tresh=3*mape; } } else { tresh = treshold; } return tresh; /* if (upperPercentile>25) factor=3; else if ((upperPercentile>15)&&(upperPercentile>25)) factor=2; else{ factor=1.5; } if (mape<15){ if ((mape<4)&&(median<=2)&&(std>7)){ tresh= 4*factor*upperPercentile+2*std+mape+median;//+ } else if ((median<4.0)&&(std<=7)){ tresh= factor*upperPercentile+std+mape+median;//+ } else if ((mape<10)&&(std>12)){ tresh= factor*upperPercentile+mape+std; } else if ((mape<10)){ tresh= upperPercentile+std+2*median;//factor*upperPercentile;//+2*median } else { tresh =factor*upperPercentile+mape;//2.5*(std+mape);//+median if (std>=10) tresh=tresh+std; } if (tresh<1) tresh=2*tresh; } else{ tresh =upperPercentile+2*(mape+median)+(std/2); if (tresh>(3.5*mape)) tresh=3.5*mape; } */ // return tresh; }