Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getPercentile

List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getPercentile

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getPercentile.

Prototype

public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException 

Source Link

Document

Returns an estimate for the pth percentile of the stored values.

Usage

From source file:streaming.core.WindowOperation.java

@Override
public Object[][] process(Object[] event) {
    long day = (Long) event[0];
    String word = (String) event[1];
    long freqs = (Long) event[2];

    TreeMap<Long, Long> sortedFreq = map.get(word);
    if (sortedFreq == null) {
        sortedFreq = new TreeMap<Long, Long>();
        map.put(word, sortedFreq);//  w  w w .j  a  va2 s  . c  o  m
    }

    Long t = sortedFreq.get(day);
    if (t != null) {
        freqs = freqs + t;
    }
    sortedFreq.put(day, freqs);

    Iterator<Entry<Long, Long>> iterator = sortedFreq.headMap(1 + day - numberOfDays).entrySet().iterator();
    while (iterator.hasNext()) {
        iterator.next();
        iterator.remove();
    }

    DescriptiveStatistics stats = new DescriptiveStatistics();
    long dayIndex = 1 + day - numberOfDays;
    for (Entry<Long, Long> e : sortedFreq.entrySet()) {
        while (e.getKey() > dayIndex) {
            dayIndex++;
            stats.addValue(0);
        }
        stats.addValue(e.getValue());
    }

    if (sortedFreq.size() > numberOfDays) {
        System.out.println(day + " size=" + sortedFreq.size() + " " + sortedFreq);
    }

    double mean = stats.getMean();
    double meadian = stats.getPercentile(50);
    mean = (mean == 0) ? 1 : mean;
    meadian = (meadian == 0) ? 1 : meadian;
    double stddev = stats.getStandardDeviation();
    stddev = (stddev == 0) ? 1 : stddev;
    double cov = stddev / mean;

    //double swna = Math.log(freqs)*freqs/stats.getMean();
    double swna1 = Math.log(meadian) * Math.abs(freqs - meadian) / stddev;
    if (Double.isNaN(swna1)) {
        System.out.println();
    }
    double swna2 = Math.abs(freqs - meadian) / stddev;
    double swna3 = freqs / (meadian * cov);

    Gaussian gaussian = new Gaussian(100, 50);

    double swna4 = (0.1 + 100 * gaussian.value(meadian)) * freqs / (meadian * cov);

    int percentageAvialableValues = Math.round(100 * sortedFreq.size() / numberOfDays);
    //System.out.println("#"+ word + " " + freqs + " "+ stats.getMean() + Arrays.toString(stats.getValues()));
    return new Object[][] { { day, word, swna1, freqs, stats.getMean(), meadian, stddev, swna2, swna3, swna4,
            cov, percentageAvialableValues } };

    //      if(freqs > 3 && swna> 5){
    //         return new Object[][]{{day, word, swna}};   
    //      }else{
    //         return null;
    //      }

}

From source file:tools.descartes.bungee.utils.DiffUtil.java

public static <T> StatsPercentile statisticsForDiffs(List<T> objects, double percent) {
    // Get a DescriptiveStatistics instance
    DescriptiveStatistics statistics = new DescriptiveStatistics();
    // Add the data from the array
    for (T object : objects) {
        if (object instanceof Long) {
            statistics.addValue((Long) object);
        } else if (object instanceof Double) {
            statistics.addValue((Double) object);
        } else if (object instanceof AbstractResponse) {
            statistics.addValue(((AbstractResponse) object).getResponseTime());
        }//from   w ww .ja  va  2  s  .  c om
    }

    StatsPercentile stats = new StatsPercentile();
    // Compute some statistics
    stats.mean = statistics.getMean();
    stats.std = statistics.getStandardDeviation();
    stats.max = Math.max(statistics.getMax(), -statistics.getMin());
    stats.percent = percent;
    stats.percentile = statistics.getPercentile(percent);
    return stats;
}

From source file:uk.ac.diamond.scisoft.analysis.diffraction.PowderRingsUtils.java

/**
 * Find major axes by looking along thick line given by relative coordinates to centre for
 * maximum intensity values//from  w  w w  .j a  v a2  s .  c  o  m
 * @param mon
 * @param axes
 * @param image
 * @param mask
 * @param roi
 * @param offset minimum position of peaks
 * @param width of line
 * @param centre
 * @param dx
 * @param dy
 */
private static void findMajorAxes(IMonitor mon, TreeSet<Double> axes, Dataset image, Dataset mask,
        EllipticalROI roi, double offset, double width, double[] centre, double dx, double dy) {
    RectangularROI rroi = new RectangularROI();
    rroi.setPoint(centre);
    rroi.setAngle(Math.atan2(dy, dx));
    rroi.setLengths(Math.hypot(dx, dy), width);
    rroi.translate(0, -0.5);
    rroi.setClippingCompensation(true);
    Dataset profile = ROIProfile.maxInBox(image, mask, rroi)[0];

    List<IdentifiedPeak> peaks = Generic1DFitter
            .findPeaks(DatasetFactory.createRange(profile.getSize(), Dataset.INT), profile, PEAK_SMOOTHING);
    if (mon != null)
        mon.worked(profile.getSize());

    System.err.printf("\n");
    DescriptiveStatistics stats = new DescriptiveStatistics();
    int[] pb = new int[1];
    int[] pe = new int[1];
    for (IdentifiedPeak p : peaks) {
        if (p.getPos() < offset) {
            continue;
        }
        pb[0] = (int) p.getMinXVal();
        pe[0] = (int) p.getMaxXVal();
        p.setArea((Double) profile.getSlice(pb, pe, null).sum());
        stats.addValue(p.getArea());
        System.err.printf("P %f A %f W %f H %f\n", p.getPos(), p.getArea(), p.getFWHM(), p.getHeight());
    }

    double area = stats.getMean() + 0.4 * (stats.getPercentile(75) - stats.getPercentile(25));
    logger.debug("Area: {}", stats);
    logger.debug("Minimum threshold: {}", area);

    double majorFactor = roi.getSemiAxis(0) / roi.getDistance(rroi.getAngle());
    double maxFWHM = MAX_FWHM_FACTOR * width;
    for (IdentifiedPeak p : peaks) {
        double l = p.getPos();
        if (l < offset) {
            continue;
        }
        //         System.err.println(p);
        // filter on area and FWHM
        if (p.getFWHM() > maxFWHM) {
            continue;
        }
        if (p.getArea() < area) {
            break;
        }
        axes.add(l * majorFactor);
    }
    if (mon != null)
        mon.worked(peaks.size());

}

From source file:util.Statistics.java

public Statistics(List<Integer> list) {
    scores = intsToDoubles(list);// w  ww .  java2  s. c o m
    DescriptiveStatistics dStats = new DescriptiveStatistics(scores);

    summaryStatistics.put("min", dStats.getMin()); // Minimum
    summaryStatistics.put("q1", dStats.getPercentile(25)); // Lower Quartile (Q1)
    summaryStatistics.put("q2", dStats.getPercentile(50)); // Middle Quartile (Median - Q2)
    summaryStatistics.put("q3", dStats.getPercentile(75)); // High Quartile (Q3)
    summaryStatistics.put("max", dStats.getMax()); // Maxiumum

    summaryStatistics.put("mean", dStats.getMean()); // Mean
    summaryStatistics.put("sd", dStats.getStandardDeviation()); // Standard Deviation

    EmpiricalDistribution distribution = new EmpiricalDistribution(NUM_BINS);
    distribution.load(scores);
    List<SummaryStatistics> binStats = distribution.getBinStats();
    double[] upperBounds = distribution.getUpperBounds();

    Double lastUpperBound = upperBounds[0];
    bins.add(new Pair<Pair<Double, Double>, Long>(
            new Pair<Double, Double>(summaryStatistics.get("min"), lastUpperBound), binStats.get(0).getN()));
    for (int i = 1; i < binStats.size(); i++) {
        bins.add(new Pair<Pair<Double, Double>, Long>(new Pair<Double, Double>(lastUpperBound, upperBounds[i]),
                binStats.get(i).getN()));
        lastUpperBound = upperBounds[i];
    }

    if (list.size() > 5 && dStats.getStandardDeviation() > 0) // Only remove outliers if relatively normal
    {
        double mean = dStats.getMean();
        double stDev = dStats.getStandardDeviation();
        NormalDistribution normalDistribution = new NormalDistribution(mean, stDev);

        Iterator<Integer> listIterator = list.iterator();
        double significanceLevel = .50 / list.size(); // Chauvenet's Criterion for Outliers
        while (listIterator.hasNext()) {
            int num = listIterator.next();
            double pValue = normalDistribution.cumulativeProbability(num);
            if (pValue < significanceLevel) {
                outliers.add(num);
                listIterator.remove();
            }
        }

        if (list.size() != dStats.getN()) // If and only if outliers have been removed
        {
            double[] significantData = intsToDoubles(list);
            dStats = new DescriptiveStatistics(significantData);

            summaryStatistics.put("min", dStats.getMin());
            summaryStatistics.put("max", dStats.getMax());
            summaryStatistics.put("mean", dStats.getMean());
            summaryStatistics.put("sd", dStats.getStandardDeviation());
        }
    }
}

From source file:weka.classifiers.timeseries.eval.TSEvaluation.java

public double CalcOutlierTreshold(DescriptiveStatistics MAPEstats) throws Exception {
    double tresh = 0;
    mape = MAPEstats.getMean();//result[i];
    std = MAPEstats.getStandardDeviation();
    median = MAPEstats.getPercentile(50);
    upperPercentile = MAPEstats.getPercentile(85);
    if (readTreshsFromFile == false) {
        if (mape < 1) {
            if (std < 5) {
                tresh = upperPercentile + 3 * std + mape + median;
                ;/*from  www.j  av  a 2s . com*/
            } else if ((std >= 5) && (std < 10)) {
                tresh = upperPercentile + 1.5 * std + mape + median;
            } else if ((std >= 10)) {
                tresh = upperPercentile + 1 * std + mape + median;
            }
        } else if ((mape >= 1) && (mape < 2)) {
            if (std < 5) {
                tresh = upperPercentile + 3 * std + mape + median;
                ;
            } else if ((std >= 5) && (std < 10)) {
                tresh = upperPercentile + 2.5 * std + mape + median;
            } else if ((std >= 10)) {
                tresh = upperPercentile + 1 * std + mape + median;
            }

        } else if ((mape >= 2) && (mape < 5)) {
            if (std < 5) {
                tresh = 2 * upperPercentile + std + mape + 1.25 * median;
                ;
            } else if ((std >= 5) && (std < 10)) {
                tresh = 1 * upperPercentile + std + mape + 2.75 * median;
            } else if ((std >= 10)) {
                tresh = 0.5 * upperPercentile + std + mape + median;
            }

        } else if ((mape >= 5) && (mape < 10)) {
            if (std < 5) {
                tresh = 1.5 * upperPercentile + std + mape + 1.25 * median;
            } else if ((std >= 5) && (std < 7.5)) {
                tresh = 0.85 * upperPercentile + std + mape + 2.5 * median;
            } else if ((std >= 7.5) && (std < 10)) {
                tresh = 0.85 * upperPercentile + std + mape + 2.5 * median;
            } else if ((std >= 10)) {
                tresh = 0.85 * upperPercentile + std + mape + 2 * median;
            }
        } else if ((mape >= 10) && (mape < 15)) {
            if (std < 5) {
                tresh = 0.75 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 5) && (std < 10)) {
                tresh = 0.5 * upperPercentile + std + mape + 1.65 * median;
            } else if ((std >= 10) && (std < 15)) {
                tresh = 2 * upperPercentile + std + mape + 2.5 * median;
            } else if ((std >= 15) && (std < 20)) {
                tresh = 2 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 20)) {
                tresh = 0.5 * upperPercentile + std + mape + 2 * median;
            }
        } else if ((mape >= 15) && (mape < 20)) {
            if (std < 10) {
                tresh = 1 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 10) && (std < 15)) {
                tresh = 1 * upperPercentile + std + mape + 3 * median;
            } else if ((std >= 15) && (std < 20)) {
                tresh = 0.5 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 20)) {
                tresh = 1 * upperPercentile + std + mape + 2 * median;
            }
        } else if ((mape >= 20) && (mape < 30)) {
            if (std < 10) {
                tresh = 1 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 10) && (std < 20)) {
                tresh = 1.25 * upperPercentile + std + mape + 2 * median;
            } else if ((std >= 20)) {
                tresh = 1 * upperPercentile + std + mape + 2 * median;
            }
            //if (tresh>(3.5*mape))
            //   tresh=3.5*mape;
        } else {
            if (std < 10) {
                tresh = 1 * upperPercentile + std + mape + median;
            } else if ((std >= 10) && (std < 20)) {
                tresh = 1 * upperPercentile + std + mape + median;
            } else if ((std >= 20)) {
                tresh = 0.5 * upperPercentile + std + mape + median;
            }
            //if (tresh>(3*mape))
            //   tresh=3*mape;
        }
    } else {
        tresh = treshold;
    }
    return tresh;
    /*       if (upperPercentile>25)
              factor=3;
           else if ((upperPercentile>15)&&(upperPercentile>25))
              factor=2;
           else{
              factor=1.5;
           }
           if (mape<15){
              if ((mape<4)&&(median<=2)&&(std>7)){
     tresh= 4*factor*upperPercentile+2*std+mape+median;//+
              }
              else if ((median<4.0)&&(std<=7)){
     tresh= factor*upperPercentile+std+mape+median;//+
              }
              else if ((mape<10)&&(std>12)){
     tresh= factor*upperPercentile+mape+std;
              }
              else if ((mape<10)){
     tresh= upperPercentile+std+2*median;//factor*upperPercentile;//+2*median
              }
              else { 
     tresh =factor*upperPercentile+mape;//2.5*(std+mape);//+median
     if (std>=10)
        tresh=tresh+std;
              }
              if (tresh<1)
     tresh=2*tresh;
            
           }
           else{
              tresh =upperPercentile+2*(mape+median)+(std/2);
              if (tresh>(3.5*mape))
        tresh=3.5*mape;
           }
    */
    //  return tresh;
}