Example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics getPercentile

List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics getPercentile

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics getPercentile.

Prototype

public double getPercentile(double p) 

Source Link

Document

Returns an estimate for the pth percentile of the stored values.

Usage

From source file:edu.berkeley.sparrow.examples.BackendBenchmarkProfiler.java

/**
 * Run the benchmark at increasingly high arrival rates until infinite queuing is
 * detected. This performs two trials, one where tasks are launched in a fixed-sized
 * pool and the other where tasks are not queued and always launched in parallel.
 * The statistics for runtime vs. arrival rate are printed out for each of the two
 * trials. //from   w  w  w.  jav  a 2s.c  o m
 */
public static void main(String[] args) throws InterruptedException, ExecutionException {
    OptionParser parser = new OptionParser();
    parser.accepts("t", "size of core thread pool").withRequiredArg().ofType(Integer.class);
    parser.accepts("b", "benchmark ID").withRequiredArg().ofType(Integer.class);
    parser.accepts("i", "number of benchmark iterations to run").withRequiredArg().ofType(Integer.class);
    parser.accepts("s", "bucket size in seconds").withRequiredArg().ofType(Integer.class);
    parser.accepts("d", " experiment duration in seconds").withRequiredArg().ofType(Integer.class);
    parser.accepts("r", "initial rate of task launches").withRequiredArg().ofType(Double.class);
    parser.accepts("f", "final rate of task launches").withRequiredArg().ofType(Double.class);
    parser.accepts("k", "step size of increased rates").withRequiredArg().ofType(Double.class);

    OptionSet options = parser.parse(args);

    int coreThreadPoolSize = 4;
    if (options.has("t")) {
        coreThreadPoolSize = (Integer) options.valueOf("t");
    }
    if (options.has("b")) {
        benchmarkId = (Integer) options.valueOf("b");
    }
    if (options.has("i")) {
        benchmarkIterations = (Integer) options.valueOf("i");
    }
    if (options.has("s")) {
        bucketSizeS = (Integer) options.valueOf("s");
    }
    if (options.has("d")) {
        trialLengthS = (Integer) options.valueOf("d");
    }
    if (options.has("r")) {
        startRate = (Double) options.valueOf("r");
    }
    if (options.has("f")) {
        endRate = (Double) options.valueOf("f");
    }
    if (options.has("k")) {
        rateStep = (Double) options.valueOf("k");
    }

    // Run the benchmark a few times to let JIT kick in
    int bucketSizeMs = bucketSizeS * 1000;
    int trialLengthMs = trialLengthS * 1000;
    runExperiment(15.0, coreThreadPoolSize, coreThreadPoolSize, bucketSizeMs, trialLengthMs,
            new DescriptiveStatistics(), new DescriptiveStatistics());
    runExperiment(15.0, coreThreadPoolSize, coreThreadPoolSize, bucketSizeMs, trialLengthMs,
            new DescriptiveStatistics(), new DescriptiveStatistics());

    for (double i = startRate; i <= endRate; i = i + rateStep) {
        try {
            DescriptiveStatistics runTimes = new DescriptiveStatistics();
            DescriptiveStatistics waitTimes = new DescriptiveStatistics();
            runExperiment(i, coreThreadPoolSize, coreThreadPoolSize, bucketSizeMs, trialLengthMs, runTimes,
                    waitTimes);
            System.out.println(i + " run " + runTimes.getPercentile(50.0) + " " + runTimes.getPercentile(95)
                    + " " + runTimes.getPercentile(99));
            System.out.println(i + " wait " + waitTimes.getPercentile(50.0) + " " + waitTimes.getPercentile(95)
                    + " " + waitTimes.getPercentile(99));
        } catch (RuntimeException e) {
            System.out.println(e);
            break;
        }
    }

    for (double i = startRate; i <= endRate; i = i + rateStep) {
        try {
            DescriptiveStatistics runTimes = new DescriptiveStatistics();
            DescriptiveStatistics waitTimes = new DescriptiveStatistics();
            runExperiment(i, coreThreadPoolSize, Integer.MAX_VALUE, bucketSizeMs, trialLengthMs, runTimes,
                    waitTimes);
            System.out.println(i + " run " + runTimes.getPercentile(50.0) + " " + runTimes.getPercentile(95)
                    + " " + runTimes.getPercentile(99));
            System.out.println(i + " wait " + waitTimes.getPercentile(50.0) + " " + waitTimes.getPercentile(95)
                    + " " + waitTimes.getPercentile(99));
        } catch (RuntimeException e) {
            System.out.println(e);
            break;
        }
    }
}

From source file:com.mozilla.socorro.RawDumpSizeScan.java

public static void main(String[] args) throws ParseException {
    String startDateStr = args[0];
    String endDateStr = args[1];//  ww  w.ja v  a 2s  .  co  m

    // Set both start/end time and start/stop row
    Calendar startCal = Calendar.getInstance();
    Calendar endCal = Calendar.getInstance();

    SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");

    if (!StringUtils.isBlank(startDateStr)) {
        startCal.setTime(sdf.parse(startDateStr));
    }
    if (!StringUtils.isBlank(endDateStr)) {
        endCal.setTime(sdf.parse(endDateStr));
    }

    DescriptiveStatistics stats = new DescriptiveStatistics();
    long numNullRawBytes = 0L;
    HTable table = null;
    Map<String, Integer> rowValueSizeMap = new HashMap<String, Integer>();
    try {
        table = new HTable(TABLE_NAME_CRASH_REPORTS);
        Scan[] scans = generateScans(startCal, endCal);
        for (Scan s : scans) {
            ResultScanner rs = table.getScanner(s);
            Iterator<Result> iter = rs.iterator();
            while (iter.hasNext()) {
                Result r = iter.next();
                ImmutableBytesWritable rawBytes = r.getBytes();
                //length = r.getValue(RAW_DATA_BYTES, DUMP_BYTES);
                if (rawBytes != null) {
                    int length = rawBytes.getLength();
                    if (length > 20971520) {
                        rowValueSizeMap.put(new String(r.getRow()), length);
                    }
                    stats.addValue(length);
                } else {
                    numNullRawBytes++;
                }

                if (stats.getN() % 10000 == 0) {
                    System.out.println("Processed " + stats.getN());
                    System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", stats.getMin(),
                            stats.getMax(), stats.getMean()));
                    System.out.println(
                            String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                                    stats.getPercentile(25.0d), stats.getPercentile(50.0d),
                                    stats.getPercentile(75.0d)));
                    System.out.println("Number of large entries: " + rowValueSizeMap.size());
                }
            }
            rs.close();
        }

        System.out.println("Finished Processing!");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", stats.getMin(), stats.getMax(),
                stats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                stats.getPercentile(25.0d), stats.getPercentile(50.0d), stats.getPercentile(75.0d)));

        for (Map.Entry<String, Integer> entry : rowValueSizeMap.entrySet()) {
            System.out.println(String.format("RowId: %s => Length: %d", entry.getKey(), entry.getValue()));
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        if (table != null) {
            try {
                table.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }
}

From source file:com.linkedin.pinot.perf.QueryRunner.java

private static void printStats(DescriptiveStatistics stats) {
    LOGGER.info(stats.toString());/*  w  ww  .jav a 2 s .  c o  m*/
    LOGGER.info("10th percentile: {}ms", stats.getPercentile(10.0));
    LOGGER.info("25th percentile: {}ms", stats.getPercentile(25.0));
    LOGGER.info("50th percentile: {}ms", stats.getPercentile(50.0));
    LOGGER.info("90th percentile: {}ms", stats.getPercentile(90.0));
    LOGGER.info("95th percentile: {}ms", stats.getPercentile(95.0));
    LOGGER.info("99th percentile: {}ms", stats.getPercentile(99.0));
    LOGGER.info("99.9th percentile: {}ms", stats.getPercentile(99.9));
}

From source file:edu.berkeley.sparrow.examples.BackendBenchmarkProfiler.java

/**
 * Run an experiment which launches tasks at {@code arrivalRate} for {@code durationMs}
 * seconds and waits for all tasks to finish. Return a {@link DescriptiveStatistics}
 * object which contains stats about the distribution of task finish times. Tasks
 * are executed in a thread pool which contains at least {@code corePoolSize} threads
 * and grows up to {@code maxPoolSize} threads (growing whenever a new task arrives
 * and all existing threads are used). //  www  . j av a  2 s  .c  om
 * 
 * Setting {@code maxPoolSize} to a very large number enacts time sharing, while
 * setting it equal to {@code corePoolSize} creates a fixed size task pool.
 * 
 * The derivative of task finishes is tracked by bucketing tasks at the granularity
 * {@code bucketSize}. If it is detected that task finishes are increasing in an 
 * unbounded fashion (i.e. infinite queuing is occuring) a {@link RuntimeException} 
 * is thrown.
 */
public static void runExperiment(double arrivalRate, int corePoolSize, int maxPoolSize, long bucketSize,
        long durationMs, DescriptiveStatistics runTimes, DescriptiveStatistics waitTimes) {
    long startTime = System.currentTimeMillis();
    long keepAliveTime = 10;
    Random r = new Random();
    BlockingQueue<Runnable> runQueue = new LinkedBlockingQueue<Runnable>();
    ExecutorService threadPool = new ThreadPoolExecutor(corePoolSize, maxPoolSize, keepAliveTime,
            TimeUnit.MILLISECONDS, runQueue);
    if (maxPoolSize == Integer.MAX_VALUE) {
        threadPool = Executors.newCachedThreadPool();
    }

    // run times indexed by bucketing interval
    HashMap<Long, List<Long>> bucketedRunTimes = new HashMap<Long, List<Long>>();
    // wait times indexed by bucketing interval
    HashMap<Long, List<Long>> bucketedWaitTimes = new HashMap<Long, List<Long>>();

    /*
     * This is a little tricky. 
     * 
     * We want to generate inter-arrival delays according to the arrival rate specified.
     * The simplest option would be to generate an arrival delay and then sleep() for it
     * before launching each task. This has in issue, however: sleep() might wait 
     * several ms longer than we ask it to. When task arrival rates get really fast, 
     * i.e. one task every 10 ms, sleeping an additional few ms will mean we launch 
     * tasks at a much lower rate than requested.
     * 
     * Instead, we keep track of task launches in a way that does not depend on how long
     * sleep() actually takes. We still might have tasks launch slightly after their
     * scheduled launch time, but we will not systematically "fall behind" due to
     * compounding time lost during sleep()'s;
     */
    long currTime = startTime;
    while (true) {
        long delay = (long) (generateInterarrivalDelay(r, arrivalRate) * 1000);

        // When should the next task launch, based on when the last task was scheduled
        // to launch.
        long nextTime = currTime + delay;

        // Diff gives how long we should wait for the next scheduled task. The difference 
        // may be negative if our last sleep() lasted too long relative to the inter-arrival
        // delay based on the last scheduled launch, so we round up to 0 in that case. 
        long diff = Math.max(0, nextTime - System.currentTimeMillis());
        currTime = nextTime;
        if (diff > 0) {
            try {
                Thread.sleep(diff);
            } catch (InterruptedException e) {
                System.err.println("Unexpected interruption!");
                System.exit(1);
            }
        }
        threadPool.submit((new BenchmarkRunnable(bucketedRunTimes, bucketedWaitTimes, bucketSize)));
        if (System.currentTimeMillis() > startTime + durationMs) {
            break;
        }
    }
    threadPool.shutdown();
    try {
        threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
    } catch (InterruptedException e1) {
        System.err.println("Unexpected interruption!");
        System.exit(1);
    }
    List<Long> times = new ArrayList<Long>(bucketedRunTimes.keySet());
    Collections.sort(times);
    HashMap<Long, DescriptiveStatistics> bucketStats = new HashMap<Long, DescriptiveStatistics>();

    // Remove first and last buckets since they will not be completely full to do
    // discretization. 
    times.remove(0);
    times.remove(times.size() - 1);

    for (Long time : times) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        List<Long> list = bucketedRunTimes.get(time);
        for (Long l : list) {
            stats.addValue(l);
            runTimes.addValue(l);
        }
        bucketStats.put(time, stats);

        List<Long> waitList = bucketedWaitTimes.get(time);
        for (Long l : waitList) {
            waitTimes.addValue(l);
        }
    }
    int size = bucketStats.size();
    if (size >= 2) {
        DescriptiveStatistics first = bucketStats.get(times.get(0));
        DescriptiveStatistics last = bucketStats.get(times.get(times.size() - 1));
        double increase = last.getPercentile(50) / first.getPercentile(50);
        // A simple heuristic, if the median runtime went up by five from the first to 
        // last complete bucket, we assume we are seeing unbounded growth
        if (increase > 5.0) {
            throw new RuntimeException(
                    "Queue not in steady state: " + last.getMean() + " vs " + first.getMean());
        }
    }
}

From source file:net.shipilev.fjptrace.util.PairedList.java

public PairedList filter(double cutoff) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = 0; i < index; i++) {
        stats.addValue(k2[i]);//from w  w w.j av  a 2  s  .co  m
    }

    double lower = stats.getPercentile(cutoff);
    double upper = stats.getPercentile(100 - cutoff);

    PairedList result = new PairedList();
    for (int i = 0; i < index; i++) {
        if (lower <= k2[i] && k2[i] <= upper) {
            result.add(k1[i], k2[i]);
        }
    }
    return result;
}

From source file:de.tudarmstadt.ukp.dkpro.keyphrases.core.evaluator.KeyphraseDatasetStatistics.java

public double median(final List<Integer> values) {
    final DescriptiveStatistics stats = new DescriptiveStatistics();
    for (final Integer value : values) {
        stats.addValue(value);/*from w  w w.  ja v  a  2s .  co m*/
    }
    return stats.getPercentile(0.5);

}

From source file:fantail.algorithms.BinaryART.java

private double getMedian(Instances data, int attIndex) throws Exception {

    if (false) {/*from   w w  w.j  a va  2s  . co  m*/
        return getMedian2(data, attIndex); // added 07-july 2013; actually they are the same
        // removed 17/07/2013 
    }

    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = (Instance) data.instance(i);
        stats.addValue(inst.value(attIndex));
    }
    double median = stats.getPercentile(50);
    return median;
}

From source file:fantail.algorithms.RankingWithBinaryPCT.java

private double getMedian(Instances data, int attIndex) throws Exception {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = (Instance) data.instance(i);
        stats.addValue(inst.value(attIndex));
    }//from  w w w . ja va  2 s.c o  m
    double median = stats.getPercentile(50);
    return median;
}

From source file:de.mpicbg.knime.hcs.base.nodes.preproc.OutlierRemoval.java

@Override
protected BufferedDataTable[] execute(BufferedDataTable[] inData, ExecutionContext exec) throws Exception {

    BufferedDataTable input = inData[0];
    DataTableSpec inputSpec = input.getDataTableSpec();

    // Get the parameter and make sure there all double value columns
    List<Attribute> parameter = new ArrayList<Attribute>();
    for (String item : parameterNames.getIncludeList()) {
        Attribute attribute = new InputTableAttribute(item, input);
        if (attribute.getType().isCompatible(DoubleValue.class)) {
            parameter.add(attribute);/*from   ww w  . ja v  a2s  . co m*/
        } else {
            logger.warn("The parameter '" + attribute.getName()
                    + "' will not be considered for outlier removal, since it is not compatible to double.");
        }
    }

    // Get the groups defined by the nominal column.
    Attribute groupingAttribute = new InputTableAttribute(this.groupingColumn.getStringValue(), input);
    Map<Object, List<DataRow>> subsets = AttributeUtils.splitRowsGeneric(input, groupingAttribute);

    // Initialize
    BufferedDataContainer keepContainer = exec.createDataContainer(inputSpec);
    BufferedDataContainer discartContainer = exec.createDataContainer(inputSpec);
    int S = subsets.size();
    int s = 1;

    // Outlier analysis for each subset
    for (Object key : subsets.keySet()) {

        // Get the subset having all constraints in common
        List<DataRow> rowSubset = subsets.get(key);

        // Get the valid values
        RealMatrix data = extractMatrix(rowSubset, parameter);

        int N = data.getColumnDimension();
        int M = data.getRowDimension();
        if (M == 0) {
            logger.warn("The group '" + key + "' has no valid values and will be removed entirely'");
        } else {

            // Determine upper and lower outlier bounds
            double[] lowerBound = new double[N];
            double[] upperBound = new double[N];
            if (method.getStringValue().equals("Boxplot")) {
                for (int c = 0; c < N; ++c) {
                    RealVector vect = data.getColumnVector(c);
                    DescriptiveStatistics stats = new DescriptiveStatistics();
                    for (double value : vect.getData()) {
                        stats.addValue(value);
                    }
                    double lowerQuantile = stats.getPercentile(25);
                    double upperQuantile = stats.getPercentile(85);
                    double whisker = factor.getDoubleValue() * Math.abs(lowerQuantile - upperQuantile);
                    lowerBound[c] = lowerQuantile - whisker;
                    upperBound[c] = upperQuantile + whisker;
                }
            } else {
                for (int c = 0; c < N; ++c) {
                    RealVector vect = data.getColumnVector(c);
                    double mean = StatUtils.mean(vect.getData());
                    double sd = Math.sqrt(StatUtils.variance(vect.getData()));
                    lowerBound[c] = mean - factor.getDoubleValue() * sd;
                    upperBound[c] = mean + factor.getDoubleValue() * sd;
                }
            }

            // Remove The outlier
            if (rule.getBooleanValue()) { // The row is only discarted if the row is an outlier in all parameter.
                for (DataRow row : rowSubset) {
                    int c = 0;
                    for (Attribute column : parameter) {

                        DataCell valueCell = row.getCell(((InputTableAttribute) column).getColumnIndex());

                        // a missing value will be treated as data point inside the bounds
                        if (valueCell.isMissing()) {
                            continue;
                        }

                        Double value = ((DoubleValue) valueCell).getDoubleValue();
                        if ((value != null) && (lowerBound[c] <= value) && (value <= upperBound[c])) {
                            break;
                        } else {
                            c++;
                        }
                    }
                    if (c != N) {
                        keepContainer.addRowToTable(row);
                    } else {
                        discartContainer.addRowToTable(row);
                    }
                }
            } else { // The row is discarted if it has a outlier for at least one parameter.
                for (DataRow row : rowSubset) {
                    int c = 0;
                    for (Attribute column : parameter) {

                        DataCell valueCell = row.getCell(((InputTableAttribute) column).getColumnIndex());

                        // a missing value will be treated as data point inside the bounds
                        if (valueCell.isMissing()) {
                            c++;
                            continue;
                        }

                        Double value = ((DoubleValue) valueCell).getDoubleValue();
                        if ((value != null) && (lowerBound[c] <= value) && (value <= upperBound[c])) {
                            c++;
                        } else {
                            break;
                        }
                    }
                    if (c == N) {
                        keepContainer.addRowToTable(row);
                    } else {
                        discartContainer.addRowToTable(row);
                    }
                }
            }
        }

        BufTableUtils.updateProgress(exec, s++, S);

    }

    keepContainer.close();
    discartContainer.close();
    return new BufferedDataTable[] { keepContainer.getTable(), discartContainer.getTable() };
}

From source file:com.joliciel.jochre.graphics.JochreImageImpl.java

public double getAverageRowHeight() {
    if (averageRowHeight == 0) {
        DescriptiveStatistics rowHeightStats = new DescriptiveStatistics();
        for (Paragraph paragraph : this.getParagraphs()) {
            for (RowOfShapes row : paragraph.getRows()) {
                int height = row.getXHeight();
                rowHeightStats.addValue(height);
            }/*from   ww w . j ava2 s  .c  om*/
        }
        averageRowHeight = rowHeightStats.getPercentile(50);
        LOG.debug("averageRowHeight: " + averageRowHeight);
    }
    return averageRowHeight;
}