List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics getPercentile
public double getPercentile(double p)
From source file:edu.berkeley.sparrow.examples.BackendBenchmarkProfiler.java
/** * Run the benchmark at increasingly high arrival rates until infinite queuing is * detected. This performs two trials, one where tasks are launched in a fixed-sized * pool and the other where tasks are not queued and always launched in parallel. * The statistics for runtime vs. arrival rate are printed out for each of the two * trials. //from w w w. jav a 2s.c o m */ public static void main(String[] args) throws InterruptedException, ExecutionException { OptionParser parser = new OptionParser(); parser.accepts("t", "size of core thread pool").withRequiredArg().ofType(Integer.class); parser.accepts("b", "benchmark ID").withRequiredArg().ofType(Integer.class); parser.accepts("i", "number of benchmark iterations to run").withRequiredArg().ofType(Integer.class); parser.accepts("s", "bucket size in seconds").withRequiredArg().ofType(Integer.class); parser.accepts("d", " experiment duration in seconds").withRequiredArg().ofType(Integer.class); parser.accepts("r", "initial rate of task launches").withRequiredArg().ofType(Double.class); parser.accepts("f", "final rate of task launches").withRequiredArg().ofType(Double.class); parser.accepts("k", "step size of increased rates").withRequiredArg().ofType(Double.class); OptionSet options = parser.parse(args); int coreThreadPoolSize = 4; if (options.has("t")) { coreThreadPoolSize = (Integer) options.valueOf("t"); } if (options.has("b")) { benchmarkId = (Integer) options.valueOf("b"); } if (options.has("i")) { benchmarkIterations = (Integer) options.valueOf("i"); } if (options.has("s")) { bucketSizeS = (Integer) options.valueOf("s"); } if (options.has("d")) { trialLengthS = (Integer) options.valueOf("d"); } if (options.has("r")) { startRate = (Double) options.valueOf("r"); } if (options.has("f")) { endRate = (Double) options.valueOf("f"); } if (options.has("k")) { rateStep = (Double) options.valueOf("k"); } // Run the benchmark a few times to let JIT kick in int bucketSizeMs = bucketSizeS * 1000; int trialLengthMs = trialLengthS * 1000; runExperiment(15.0, coreThreadPoolSize, coreThreadPoolSize, bucketSizeMs, trialLengthMs, new DescriptiveStatistics(), new DescriptiveStatistics()); runExperiment(15.0, coreThreadPoolSize, coreThreadPoolSize, bucketSizeMs, trialLengthMs, new DescriptiveStatistics(), new DescriptiveStatistics()); for (double i = startRate; i <= endRate; i = i + rateStep) { try { DescriptiveStatistics runTimes = new DescriptiveStatistics(); DescriptiveStatistics waitTimes = new DescriptiveStatistics(); runExperiment(i, coreThreadPoolSize, coreThreadPoolSize, bucketSizeMs, trialLengthMs, runTimes, waitTimes); System.out.println(i + " run " + runTimes.getPercentile(50.0) + " " + runTimes.getPercentile(95) + " " + runTimes.getPercentile(99)); System.out.println(i + " wait " + waitTimes.getPercentile(50.0) + " " + waitTimes.getPercentile(95) + " " + waitTimes.getPercentile(99)); } catch (RuntimeException e) { System.out.println(e); break; } } for (double i = startRate; i <= endRate; i = i + rateStep) { try { DescriptiveStatistics runTimes = new DescriptiveStatistics(); DescriptiveStatistics waitTimes = new DescriptiveStatistics(); runExperiment(i, coreThreadPoolSize, Integer.MAX_VALUE, bucketSizeMs, trialLengthMs, runTimes, waitTimes); System.out.println(i + " run " + runTimes.getPercentile(50.0) + " " + runTimes.getPercentile(95) + " " + runTimes.getPercentile(99)); System.out.println(i + " wait " + waitTimes.getPercentile(50.0) + " " + waitTimes.getPercentile(95) + " " + waitTimes.getPercentile(99)); } catch (RuntimeException e) { System.out.println(e); break; } } }
From source file:com.mozilla.socorro.RawDumpSizeScan.java
public static void main(String[] args) throws ParseException { String startDateStr = args[0]; String endDateStr = args[1];// ww w.ja v a 2s . co m // Set both start/end time and start/stop row Calendar startCal = Calendar.getInstance(); Calendar endCal = Calendar.getInstance(); SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); if (!StringUtils.isBlank(startDateStr)) { startCal.setTime(sdf.parse(startDateStr)); } if (!StringUtils.isBlank(endDateStr)) { endCal.setTime(sdf.parse(endDateStr)); } DescriptiveStatistics stats = new DescriptiveStatistics(); long numNullRawBytes = 0L; HTable table = null; Map<String, Integer> rowValueSizeMap = new HashMap<String, Integer>(); try { table = new HTable(TABLE_NAME_CRASH_REPORTS); Scan[] scans = generateScans(startCal, endCal); for (Scan s : scans) { ResultScanner rs = table.getScanner(s); Iterator<Result> iter = rs.iterator(); while (iter.hasNext()) { Result r = iter.next(); ImmutableBytesWritable rawBytes = r.getBytes(); //length = r.getValue(RAW_DATA_BYTES, DUMP_BYTES); if (rawBytes != null) { int length = rawBytes.getLength(); if (length > 20971520) { rowValueSizeMap.put(new String(r.getRow()), length); } stats.addValue(length); } else { numNullRawBytes++; } if (stats.getN() % 10000 == 0) { System.out.println("Processed " + stats.getN()); System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", stats.getMin(), stats.getMax(), stats.getMean())); System.out.println( String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f", stats.getPercentile(25.0d), stats.getPercentile(50.0d), stats.getPercentile(75.0d))); System.out.println("Number of large entries: " + rowValueSizeMap.size()); } } rs.close(); } System.out.println("Finished Processing!"); System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", stats.getMin(), stats.getMax(), stats.getMean())); System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f", stats.getPercentile(25.0d), stats.getPercentile(50.0d), stats.getPercentile(75.0d))); for (Map.Entry<String, Integer> entry : rowValueSizeMap.entrySet()) { System.out.println(String.format("RowId: %s => Length: %d", entry.getKey(), entry.getValue())); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (table != null) { try { table.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } }
From source file:com.linkedin.pinot.perf.QueryRunner.java
private static void printStats(DescriptiveStatistics stats) { LOGGER.info(stats.toString());/* w ww .jav a 2 s . c o m*/ LOGGER.info("10th percentile: {}ms", stats.getPercentile(10.0)); LOGGER.info("25th percentile: {}ms", stats.getPercentile(25.0)); LOGGER.info("50th percentile: {}ms", stats.getPercentile(50.0)); LOGGER.info("90th percentile: {}ms", stats.getPercentile(90.0)); LOGGER.info("95th percentile: {}ms", stats.getPercentile(95.0)); LOGGER.info("99th percentile: {}ms", stats.getPercentile(99.0)); LOGGER.info("99.9th percentile: {}ms", stats.getPercentile(99.9)); }
From source file:edu.berkeley.sparrow.examples.BackendBenchmarkProfiler.java
/** * Run an experiment which launches tasks at {@code arrivalRate} for {@code durationMs} * seconds and waits for all tasks to finish. Return a {@link DescriptiveStatistics} * object which contains stats about the distribution of task finish times. Tasks * are executed in a thread pool which contains at least {@code corePoolSize} threads * and grows up to {@code maxPoolSize} threads (growing whenever a new task arrives * and all existing threads are used). // www . j av a 2 s .c om * * Setting {@code maxPoolSize} to a very large number enacts time sharing, while * setting it equal to {@code corePoolSize} creates a fixed size task pool. * * The derivative of task finishes is tracked by bucketing tasks at the granularity * {@code bucketSize}. If it is detected that task finishes are increasing in an * unbounded fashion (i.e. infinite queuing is occuring) a {@link RuntimeException} * is thrown. */ public static void runExperiment(double arrivalRate, int corePoolSize, int maxPoolSize, long bucketSize, long durationMs, DescriptiveStatistics runTimes, DescriptiveStatistics waitTimes) { long startTime = System.currentTimeMillis(); long keepAliveTime = 10; Random r = new Random(); BlockingQueue<Runnable> runQueue = new LinkedBlockingQueue<Runnable>(); ExecutorService threadPool = new ThreadPoolExecutor(corePoolSize, maxPoolSize, keepAliveTime, TimeUnit.MILLISECONDS, runQueue); if (maxPoolSize == Integer.MAX_VALUE) { threadPool = Executors.newCachedThreadPool(); } // run times indexed by bucketing interval HashMap<Long, List<Long>> bucketedRunTimes = new HashMap<Long, List<Long>>(); // wait times indexed by bucketing interval HashMap<Long, List<Long>> bucketedWaitTimes = new HashMap<Long, List<Long>>(); /* * This is a little tricky. * * We want to generate inter-arrival delays according to the arrival rate specified. * The simplest option would be to generate an arrival delay and then sleep() for it * before launching each task. This has in issue, however: sleep() might wait * several ms longer than we ask it to. When task arrival rates get really fast, * i.e. one task every 10 ms, sleeping an additional few ms will mean we launch * tasks at a much lower rate than requested. * * Instead, we keep track of task launches in a way that does not depend on how long * sleep() actually takes. We still might have tasks launch slightly after their * scheduled launch time, but we will not systematically "fall behind" due to * compounding time lost during sleep()'s; */ long currTime = startTime; while (true) { long delay = (long) (generateInterarrivalDelay(r, arrivalRate) * 1000); // When should the next task launch, based on when the last task was scheduled // to launch. long nextTime = currTime + delay; // Diff gives how long we should wait for the next scheduled task. The difference // may be negative if our last sleep() lasted too long relative to the inter-arrival // delay based on the last scheduled launch, so we round up to 0 in that case. long diff = Math.max(0, nextTime - System.currentTimeMillis()); currTime = nextTime; if (diff > 0) { try { Thread.sleep(diff); } catch (InterruptedException e) { System.err.println("Unexpected interruption!"); System.exit(1); } } threadPool.submit((new BenchmarkRunnable(bucketedRunTimes, bucketedWaitTimes, bucketSize))); if (System.currentTimeMillis() > startTime + durationMs) { break; } } threadPool.shutdown(); try { threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS); } catch (InterruptedException e1) { System.err.println("Unexpected interruption!"); System.exit(1); } List<Long> times = new ArrayList<Long>(bucketedRunTimes.keySet()); Collections.sort(times); HashMap<Long, DescriptiveStatistics> bucketStats = new HashMap<Long, DescriptiveStatistics>(); // Remove first and last buckets since they will not be completely full to do // discretization. times.remove(0); times.remove(times.size() - 1); for (Long time : times) { DescriptiveStatistics stats = new DescriptiveStatistics(); List<Long> list = bucketedRunTimes.get(time); for (Long l : list) { stats.addValue(l); runTimes.addValue(l); } bucketStats.put(time, stats); List<Long> waitList = bucketedWaitTimes.get(time); for (Long l : waitList) { waitTimes.addValue(l); } } int size = bucketStats.size(); if (size >= 2) { DescriptiveStatistics first = bucketStats.get(times.get(0)); DescriptiveStatistics last = bucketStats.get(times.get(times.size() - 1)); double increase = last.getPercentile(50) / first.getPercentile(50); // A simple heuristic, if the median runtime went up by five from the first to // last complete bucket, we assume we are seeing unbounded growth if (increase > 5.0) { throw new RuntimeException( "Queue not in steady state: " + last.getMean() + " vs " + first.getMean()); } } }
From source file:net.shipilev.fjptrace.util.PairedList.java
public PairedList filter(double cutoff) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < index; i++) { stats.addValue(k2[i]);//from w w w.j av a 2 s .co m } double lower = stats.getPercentile(cutoff); double upper = stats.getPercentile(100 - cutoff); PairedList result = new PairedList(); for (int i = 0; i < index; i++) { if (lower <= k2[i] && k2[i] <= upper) { result.add(k1[i], k2[i]); } } return result; }
From source file:de.tudarmstadt.ukp.dkpro.keyphrases.core.evaluator.KeyphraseDatasetStatistics.java
public double median(final List<Integer> values) { final DescriptiveStatistics stats = new DescriptiveStatistics(); for (final Integer value : values) { stats.addValue(value);/*from w w w. ja v a 2s . co m*/ } return stats.getPercentile(0.5); }
From source file:fantail.algorithms.BinaryART.java
private double getMedian(Instances data, int attIndex) throws Exception { if (false) {/*from w w w.j a va 2s . co m*/ return getMedian2(data, attIndex); // added 07-july 2013; actually they are the same // removed 17/07/2013 } DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); stats.addValue(inst.value(attIndex)); } double median = stats.getPercentile(50); return median; }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
private double getMedian(Instances data, int attIndex) throws Exception { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); stats.addValue(inst.value(attIndex)); }//from w w w . ja va 2 s.c o m double median = stats.getPercentile(50); return median; }
From source file:de.mpicbg.knime.hcs.base.nodes.preproc.OutlierRemoval.java
@Override protected BufferedDataTable[] execute(BufferedDataTable[] inData, ExecutionContext exec) throws Exception { BufferedDataTable input = inData[0]; DataTableSpec inputSpec = input.getDataTableSpec(); // Get the parameter and make sure there all double value columns List<Attribute> parameter = new ArrayList<Attribute>(); for (String item : parameterNames.getIncludeList()) { Attribute attribute = new InputTableAttribute(item, input); if (attribute.getType().isCompatible(DoubleValue.class)) { parameter.add(attribute);/*from ww w . ja v a2s . co m*/ } else { logger.warn("The parameter '" + attribute.getName() + "' will not be considered for outlier removal, since it is not compatible to double."); } } // Get the groups defined by the nominal column. Attribute groupingAttribute = new InputTableAttribute(this.groupingColumn.getStringValue(), input); Map<Object, List<DataRow>> subsets = AttributeUtils.splitRowsGeneric(input, groupingAttribute); // Initialize BufferedDataContainer keepContainer = exec.createDataContainer(inputSpec); BufferedDataContainer discartContainer = exec.createDataContainer(inputSpec); int S = subsets.size(); int s = 1; // Outlier analysis for each subset for (Object key : subsets.keySet()) { // Get the subset having all constraints in common List<DataRow> rowSubset = subsets.get(key); // Get the valid values RealMatrix data = extractMatrix(rowSubset, parameter); int N = data.getColumnDimension(); int M = data.getRowDimension(); if (M == 0) { logger.warn("The group '" + key + "' has no valid values and will be removed entirely'"); } else { // Determine upper and lower outlier bounds double[] lowerBound = new double[N]; double[] upperBound = new double[N]; if (method.getStringValue().equals("Boxplot")) { for (int c = 0; c < N; ++c) { RealVector vect = data.getColumnVector(c); DescriptiveStatistics stats = new DescriptiveStatistics(); for (double value : vect.getData()) { stats.addValue(value); } double lowerQuantile = stats.getPercentile(25); double upperQuantile = stats.getPercentile(85); double whisker = factor.getDoubleValue() * Math.abs(lowerQuantile - upperQuantile); lowerBound[c] = lowerQuantile - whisker; upperBound[c] = upperQuantile + whisker; } } else { for (int c = 0; c < N; ++c) { RealVector vect = data.getColumnVector(c); double mean = StatUtils.mean(vect.getData()); double sd = Math.sqrt(StatUtils.variance(vect.getData())); lowerBound[c] = mean - factor.getDoubleValue() * sd; upperBound[c] = mean + factor.getDoubleValue() * sd; } } // Remove The outlier if (rule.getBooleanValue()) { // The row is only discarted if the row is an outlier in all parameter. for (DataRow row : rowSubset) { int c = 0; for (Attribute column : parameter) { DataCell valueCell = row.getCell(((InputTableAttribute) column).getColumnIndex()); // a missing value will be treated as data point inside the bounds if (valueCell.isMissing()) { continue; } Double value = ((DoubleValue) valueCell).getDoubleValue(); if ((value != null) && (lowerBound[c] <= value) && (value <= upperBound[c])) { break; } else { c++; } } if (c != N) { keepContainer.addRowToTable(row); } else { discartContainer.addRowToTable(row); } } } else { // The row is discarted if it has a outlier for at least one parameter. for (DataRow row : rowSubset) { int c = 0; for (Attribute column : parameter) { DataCell valueCell = row.getCell(((InputTableAttribute) column).getColumnIndex()); // a missing value will be treated as data point inside the bounds if (valueCell.isMissing()) { c++; continue; } Double value = ((DoubleValue) valueCell).getDoubleValue(); if ((value != null) && (lowerBound[c] <= value) && (value <= upperBound[c])) { c++; } else { break; } } if (c == N) { keepContainer.addRowToTable(row); } else { discartContainer.addRowToTable(row); } } } } BufTableUtils.updateProgress(exec, s++, S); } keepContainer.close(); discartContainer.close(); return new BufferedDataTable[] { keepContainer.getTable(), discartContainer.getTable() }; }
From source file:com.joliciel.jochre.graphics.JochreImageImpl.java
public double getAverageRowHeight() { if (averageRowHeight == 0) { DescriptiveStatistics rowHeightStats = new DescriptiveStatistics(); for (Paragraph paragraph : this.getParagraphs()) { for (RowOfShapes row : paragraph.getRows()) { int height = row.getXHeight(); rowHeightStats.addValue(height); }/*from ww w . j ava2 s .c om*/ } averageRowHeight = rowHeightStats.getPercentile(50); LOG.debug("averageRowHeight: " + averageRowHeight); } return averageRowHeight; }