List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue
public void addValue(double v)
From source file:com.facebook.presto.tests.AbstractTestQueries.java
@Test public void testTableSamplePoissonizedRescaled() throws Exception { DescriptiveStatistics stats = new DescriptiveStatistics(); long total = (long) computeExpected("SELECT COUNT(*) FROM orders", ImmutableList.of(BIGINT)) .getMaterializedRows().get(0).getField(0); for (int i = 0; i < 100; i++) { String value = (String) computeActual( "SELECT COUNT(*) FROM orders TABLESAMPLE POISSONIZED (50) RESCALED APPROXIMATE AT 95 CONFIDENCE") .getMaterializedRows().get(0).getField(0); stats.addValue(Long.parseLong(value.split(" ")[0]) * 1.0 / total); }//from ww w . j a v a2 s. c o m double mean = stats.getGeometricMean(); assertTrue(mean > 0.90 && mean < 1.1, format("Expected sample to be rescaled to ~1.0, but was %s", mean)); assertTrue(stats.getVariance() > 0, "Samples all had the exact same size"); }
From source file:com.facebook.presto.tests.AbstractTestQueries.java
@Test public void testTableSampleBernoulli() throws Exception { DescriptiveStatistics stats = new DescriptiveStatistics(); int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows() .size();/*ww w .j av a2s . c om*/ for (int i = 0; i < 100; i++) { List<MaterializedRow> values = computeActual("SELECT orderkey FROM ORDERS TABLESAMPLE BERNOULLI (50)") .getMaterializedRows(); assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows"); stats.addValue(values.size() * 1.0 / total); } double mean = stats.getGeometricMean(); assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean)); }
From source file:knop.psfj.BeadImage.java
/** * load the stack into memory.// w w w. j a va 2s . c om */ public synchronized void workFromMemory() { // if the fileAddress is not equal to null, this means that // the image has not been previously load into memory if (fileAddress != null && stack == null) { System.out.println("Loading image in memory..."); setProgress(0, "Loading image in memory..."); try { // BF.openImagePlus(path) // stack = BF.openImagePlus(fileAddress)[0].getImageStack(); // stack = new // Opener().openImage(fileAddress).getImageStack();// // IJ.openImage(fileAddress).getStack(); ImageProcessorReader ipr = new ImageProcessorReader( new ChannelSeparator(LociPrefs.makeImageReader())); DescriptiveStatistics standardDeviations = new DescriptiveStatistics(); ipr.setId(fileAddress); int width = ipr.getSizeX(); int height = ipr.getSizeY(); int num = ipr.getImageCount(); int numChannel = ipr.getSizeC(); int bitsPerPixel = ipr.getBitsPerPixel(); double stdDev; double min; double max; // if a second channel in the image is detected boolean isSecondChannel = false; BeadImage secondChannel = null; ImageStack secondStack = null; if (numChannel == 2) { secondChannel = new BeadImage(); secondChannel.setFileAddress(fileAddress); secondChannel.setImageName(secondChannel.getImageName() + "_channel_2"); secondStack = new ImageStack(width, height); isSecondChannel = true; secondChannel.setStack(secondStack); notifyObservers(MSG_NEW_CHANNEL_DETECTED, "Two channels detected", null, secondChannel); } stack = new ImageStack(width, height); for (int i = 0; i != num; i++) { setProgress(i, num); setStatus("Loading slice " + (i + 1) + "/" + num + "..."); ImageProcessor ip = ipr.openProcessors(i)[0]; if (isSecondChannel) { i++; ImageProcessor ip2 = ipr.openProcessors(i)[0]; secondStack.addSlice(ip2); secondChannel.setStatus("Loading slice " + (i + 1) + "/" + num + "..."); secondChannel.setProgress(i, num); } min = ip.getMin(); max = ip.getMax(); if (min < minIntentisyOfWholeStack) { minIntentisyOfWholeStack = min; } if (max > maxIntensityOfWholeStack) { maxIntensityOfWholeStack = max; } stdDev = ip.getStatistics().stdDev; if (standardDeviations.getMax() < stdDev) { beadFocusPlane = i; } standardDeviations.addValue(stdDev); stack.addSlice(ip); updateView(ip); } if (isSecondChannel) { autoFocus(); secondChannel.autoFocus(); secondChannel.setProgress(100); } try { ipr.close(); } catch (Exception e) { System.err.println("Error when closing the image reader."); } setChanged(); notifyObservers(new Message(this, MSG_IMAGE_OKAY)); setStatus("Done."); setProgress(100); setImageHeight(stack.getHeight()); setImageWidth(stack.getWidth()); isValid = true; new ImagePlus("", stack).resetDisplayRange(); // openImage(); } catch (NullPointerException e) { notifyError("Image not valid."); e.printStackTrace(); setProgress(0, "image not okay"); return; } catch (FormatException e) { setProgress(0, "This image format is not supported."); notifyError("This image format is not supported"); e.printStackTrace(); } catch (IOException e) { notifyError("File not accessible"); setProgress(0, "Image not reachable."); e.printStackTrace(); } } }
From source file:io.prestosql.tests.AbstractTestQueries.java
@Test public void testTableSampleBernoulli() { DescriptiveStatistics stats = new DescriptiveStatistics(); int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows() .size();/*ww w .j av a 2s .c o m*/ for (int i = 0; i < 100; i++) { List<MaterializedRow> values = computeActual("SELECT orderkey FROM orders TABLESAMPLE BERNOULLI (50)") .getMaterializedRows(); assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows"); stats.addValue(values.size() * 1.0 / total); } double mean = stats.getGeometricMean(); assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean)); }
From source file:org.alfresco.bm.event.AbstractResultService.java
/** * {@inheritDoc}//from w ww . j av a 2 s . co m */ @Override public void getResults(ResultHandler handler, long startTime, long windowSize, long reportPeriod, boolean chartOnly) { /* * Keep track of all events' statistics. * It is possible to report more frequently than the window size. * For each report period in the reporting window, the statistics for the events need to be maintained. */ if (handler == null) { throw new IllegalArgumentException("A result handler must be supplied."); } if (windowSize <= 0L) { throw new IllegalArgumentException("'windowSize' must be a non-zero, positive number."); } if (reportPeriod <= 0L) { throw new IllegalArgumentException("'reportPeriod' must be a non-zero, positive number."); } if (reportPeriod > windowSize) { throw new IllegalArgumentException("'reportPeriod' cannot more than the 'windowSize'."); } if (windowSize % reportPeriod != 0L) { throw new IllegalArgumentException("'windowSize' must be a multiple of 'reportPeriod'."); } // We have to keep statistics for each reporting period int windowMultiple = (int) (windowSize / reportPeriod); // Build stats for reporting back // Each LinkedList will have 'windowMultiple' entries. // The newest statistics will be the last in the linked list; results will be reported from the first entry each time. Map<String, LinkedList<DescriptiveStatistics>> statsByEventName = new HashMap<String, LinkedList<DescriptiveStatistics>>( 13); Map<String, LinkedList<AtomicInteger>> failuresByEventName = new HashMap<String, LinkedList<AtomicInteger>>( 13); // Our even queries use separate windows EventRecord firstResult = getFirstResult(); if (firstResult == null) { // There is nothing return; } long firstResultStartTime = firstResult.getStartTime(); EventRecord lastResult = getLastResult(); long lastResultStartTime = lastResult.getStartTime(); long queryWindowStartTime = Math.max(firstResultStartTime, startTime); // The start time is inclusive long queryWindowSize = lastResult.getStartTime() - firstResult.getStartTime(); if (queryWindowSize < 60000L) { queryWindowSize = 60000L; // Query window is at least a minute } else if (queryWindowSize > (60000L * 60L)) { queryWindowSize = 60000L * 60L; // Query window is at most an hour } long queryWindowEndTime = queryWindowStartTime + queryWindowSize; // Rebase the aggregation window to encompasse the first event long currentWindowEndTime = (long) Math.floor((firstResultStartTime + reportPeriod) / reportPeriod) * reportPeriod; long currentWindowStartTime = currentWindowEndTime - windowSize; // Iterate over the results int skip = 0; int limit = 10000; boolean stop = false; boolean unreportedResults = false; breakStop: while (!stop) { List<EventRecord> results = getResults(queryWindowStartTime, queryWindowEndTime, chartOnly, skip, limit); if (results.size() == 0) { if (queryWindowEndTime > lastResultStartTime) { // The query window has included the last event, so we have extracted all results if (unreportedResults) { // The query window ends in the future, so we are done reportAndCycleStats(statsByEventName, failuresByEventName, currentWindowStartTime, currentWindowEndTime, windowMultiple, handler); unreportedResults = false; } stop = true; } else { // Move the query window up queryWindowStartTime = queryWindowEndTime; queryWindowEndTime += queryWindowSize; // Reset the skip count as we are in a new query window skip = 0; } // We continue continue; } // Process each result found in the query window for (EventRecord eventRecord : results) { String eventRecordName = eventRecord.getEvent().getName(); long eventRecordStartTime = eventRecord.getStartTime(); long eventRecordTime = eventRecord.getTime(); boolean eventRecordSuccess = eventRecord.isSuccess(); // If the current event is past the reporting period, then report if (eventRecordStartTime >= currentWindowEndTime) { // Report the current stats stop = reportAndCycleStats(statsByEventName, failuresByEventName, currentWindowStartTime, currentWindowEndTime, windowMultiple, handler); unreportedResults = false; // Shift the window up by one report period currentWindowStartTime += reportPeriod; currentWindowEndTime += reportPeriod; // Check for stop if (stop) { break breakStop; } } // Increase the skip with each window result skip++; // Ignore results we don't wish to chart if (chartOnly && !eventRecord.isChart()) { continue; } // We have to report this result at some point unreportedResults = true; // Get the linked list of stats for the event LinkedList<DescriptiveStatistics> eventStatsLL = statsByEventName.get(eventRecordName); if (eventStatsLL == null) { // Create a LL for the event eventStatsLL = new LinkedList<DescriptiveStatistics>(); statsByEventName.put(eventRecordName, eventStatsLL); // We need at least one entry in order to record stats eventStatsLL.add(new DescriptiveStatistics()); } // Write the current event to all the stats for the event for (DescriptiveStatistics eventStats : eventStatsLL) { eventStats.addValue(eventRecordTime); } // Get the linked list of failure counts for the event LinkedList<AtomicInteger> eventFailuresLL = failuresByEventName.get(eventRecordName); if (eventFailuresLL == null) { // Create a LL for the event eventFailuresLL = new LinkedList<AtomicInteger>(); failuresByEventName.put(eventRecordName, eventFailuresLL); // Need one entry to record failures eventFailuresLL.add(new AtomicInteger(0)); } // Write any failures to all counts for the event if (!eventRecordSuccess) { for (AtomicInteger eventFailures : eventFailuresLL) { eventFailures.incrementAndGet(); } } } } }
From source file:org.alfresco.bm.event.mongo.MongoResultServiceTest.java
/** * Test the case where the reporting period is smaller than the stats window */// w w w.j a v a 2s. com @Test public void getCheckedResultsUsingHandler() { pumpRecords(10); final AtomicInteger count = new AtomicInteger(); final Map<String, DescriptiveStatistics> lastStatsByEventName = new HashMap<String, DescriptiveStatistics>( 17); resultService.getResults(new ResultHandler() { @Override public boolean processResult(long fromTime, long toTime, Map<String, DescriptiveStatistics> statsByEventName, Map<String, Integer> failuresByEventName) throws Throwable { // Always keep the last stats lastStatsByEventName.clear(); lastStatsByEventName.putAll(statsByEventName); count.incrementAndGet(); return true; } }, 0L, 200L, 10L, false); // Check assertEquals(10, count.get()); // Now go through the last stats received // Check it against the last window size List<String> names = resultService.getEventNames(); for (String eventName : names) { List<EventRecord> eventResults = resultService.getResults(eventName, 0, 1000); DescriptiveStatistics eventStats = new DescriptiveStatistics(); for (EventRecord eventRecord : eventResults) { eventStats.addValue(eventRecord.getTime()); } DescriptiveStatistics lastEventStats = lastStatsByEventName.get(eventName); assertNotNull("No last report for event '" + eventName + "'.", lastEventStats); // Now check that this matched the last report exactly assertEquals("Mean for '" + eventName + "' was not correct. ", (long) Math.floor(eventStats.getMean()), (long) Math.floor(lastStatsByEventName.get(eventName).getMean())); } }
From source file:org.apache.gobblin.salesforce.SalesforceSource.java
String generateSpecifiedPartitions(Histogram histogram, int minTargetPartitionSize, int maxPartitions, long lowWatermark, long expectedHighWatermark) { int interval = computeTargetPartitionSize(histogram, minTargetPartitionSize, maxPartitions); int totalGroups = histogram.getGroups().size(); log.info("Histogram total record count: " + histogram.totalRecordCount); log.info("Histogram total groups: " + totalGroups); log.info("maxPartitions: " + maxPartitions); log.info("interval: " + interval); List<HistogramGroup> groups = histogram.getGroups(); List<String> partitionPoints = new ArrayList<>(); DescriptiveStatistics statistics = new DescriptiveStatistics(); int count = 0; HistogramGroup group;/*w w w . j av a 2s . com*/ Iterator<HistogramGroup> it = groups.iterator(); while (it.hasNext()) { group = it.next(); if (count == 0) { // Add a new partition point; partitionPoints.add( Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); } /** * Using greedy algorithm by keep adding group until it exceeds the interval size (x2) * Proof: Assuming nth group violates 2 x interval size, then all groups from 0th to (n-1)th, plus nth group, * will have total size larger or equal to interval x 2. Hence, we are saturating all intervals (with original size) * without leaving any unused space in between. We could choose x3,x4... but it is not space efficient. */ if (count != 0 && count + group.count >= 2 * interval) { // Summarize current group statistics.addValue(count); // A step-in start partitionPoints.add( Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); count = group.count; } else { // Add group into current partition count += group.count; } if (count >= interval) { // Summarize current group statistics.addValue(count); // A fresh start next time count = 0; } } if (partitionPoints.isEmpty()) { throw new RuntimeException("Unexpected empty partition list"); } if (count > 0) { // Summarize last group statistics.addValue(count); } // Add global high watermark as last point partitionPoints.add(Long.toString(expectedHighWatermark)); log.info("Dynamic partitioning statistics: "); log.info("data: " + Arrays.toString(statistics.getValues())); log.info(statistics.toString()); String specifiedPartitions = Joiner.on(",").join(partitionPoints); log.info("Calculated specified partitions: " + specifiedPartitions); return specifiedPartitions; }
From source file:org.apache.groovy.perf.CompilerPerformanceTest.java
public static void main(String[] args) throws Exception { List<File> sources = new ArrayList<>(); List<URL> classpath = new ArrayList<>(); boolean isCp = false; for (String arg : args) { if ("-cp".equals(arg)) { isCp = true;// w ww . jav a 2s. com } else if (isCp) { classpath.add(new File(arg).toURI().toURL()); } else { sources.add(new File(arg)); } } ScriptCompilationExecuter executer = new ScriptCompilationExecuter( sources.toArray(new File[sources.size()]), classpath); System.out.println("Using Groovy " + GROOVY_VERSION); DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < WARMUP + REPEAT; i++) { if (i < WARMUP) { System.out.println("Warmup #" + (i + 1)); } else { System.out.println("Round #" + (i - WARMUP)); } long dur = executer.execute(); System.gc(); System.out.printf("Compile time = %dms%n", dur); if (i >= WARMUP) { stats.addValue((double) dur); } } System.out.println("Compilation took " + stats.getMean() + "ms " + stats.getStandardDeviation() + "ms"); FileWriter wrt = new FileWriter(new File("target/compilation-stats.csv"), true); wrt.append(String.format("%s;%s;%s\n", GROOVY_VERSION, stats.getMean(), stats.getStandardDeviation())); wrt.close(); }
From source file:org.apache.hadoop.hive.metastore.tools.MicroBenchmark.java
/** * Run the benchmark and measure run-time statistics in nanoseconds.<p> * Before the run the warm-up phase is executed. * @param pre Optional pre-test setup//from ww w . j av a2s .co m * @param test Mandatory test * @param post Optional post-test cleanup * @return Statistics describing the results. All times are in nanoseconds. */ public DescriptiveStatistics measure(@Nullable Runnable pre, @NotNull Runnable test, @Nullable Runnable post) { // Warmup phase for (int i = 0; i < warmup; i++) { if (pre != null) { pre.run(); } test.run(); if (post != null) { post.run(); } } // Run the benchmark DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < iterations; i++) { if (pre != null) { pre.run(); } long start = System.nanoTime(); test.run(); long end = System.nanoTime(); stats.addValue((double) (end - start) / scaleFactor); if (post != null) { post.run(); } } return stats; }
From source file:org.apache.jackrabbit.oak.commons.benchmark.MicroBenchmark.java
private static DescriptiveStatistics runTest(Benchmark benchmark) throws Exception { final DescriptiveStatistics statistics = new DescriptiveStatistics(); long runtimeEnd = System.currentTimeMillis() + TimeUnit.SECONDS.toMillis(60); while (System.currentTimeMillis() < runtimeEnd) { statistics.addValue(execute(benchmark)); }//from w w w . j ava2 s .c o m return statistics; }