Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue.

Prototype

public void addValue(double v)

Source Link

Document

Adds the value to the dataset.

Usage

From source file:com.facebook.presto.tests.AbstractTestQueries.java

@Test
public void testTableSamplePoissonizedRescaled() throws Exception {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    long total = (long) computeExpected("SELECT COUNT(*) FROM orders", ImmutableList.of(BIGINT))
            .getMaterializedRows().get(0).getField(0);

    for (int i = 0; i < 100; i++) {
        String value = (String) computeActual(
                "SELECT COUNT(*) FROM orders TABLESAMPLE POISSONIZED (50) RESCALED APPROXIMATE AT 95 CONFIDENCE")
                        .getMaterializedRows().get(0).getField(0);
        stats.addValue(Long.parseLong(value.split(" ")[0]) * 1.0 / total);
    }//from ww w  . j a v  a2  s.  c  o m

    double mean = stats.getGeometricMean();
    assertTrue(mean > 0.90 && mean < 1.1, format("Expected sample to be rescaled to ~1.0, but was %s", mean));
    assertTrue(stats.getVariance() > 0, "Samples all had the exact same size");
}

From source file:com.facebook.presto.tests.AbstractTestQueries.java

@Test
public void testTableSampleBernoulli() throws Exception {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows()
            .size();/*ww  w .j av  a2s . c  om*/

    for (int i = 0; i < 100; i++) {
        List<MaterializedRow> values = computeActual("SELECT orderkey FROM ORDERS TABLESAMPLE BERNOULLI (50)")
                .getMaterializedRows();

        assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows");
        stats.addValue(values.size() * 1.0 / total);
    }

    double mean = stats.getGeometricMean();
    assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
}

From source file:knop.psfj.BeadImage.java

/**
 * load the stack into memory.// w w w.  j  a va  2s .  c om
 */
public synchronized void workFromMemory() {
    // if the fileAddress is not equal to null, this means that
    // the image has not been previously load into memory
    if (fileAddress != null && stack == null) {
        System.out.println("Loading image in memory...");
        setProgress(0, "Loading image in memory...");

        try {
            // BF.openImagePlus(path)

            // stack = BF.openImagePlus(fileAddress)[0].getImageStack();
            // stack = new
            // Opener().openImage(fileAddress).getImageStack();//
            // IJ.openImage(fileAddress).getStack();
            ImageProcessorReader ipr = new ImageProcessorReader(
                    new ChannelSeparator(LociPrefs.makeImageReader()));
            DescriptiveStatistics standardDeviations = new DescriptiveStatistics();

            ipr.setId(fileAddress);

            int width = ipr.getSizeX();
            int height = ipr.getSizeY();
            int num = ipr.getImageCount();
            int numChannel = ipr.getSizeC();
            int bitsPerPixel = ipr.getBitsPerPixel();

            double stdDev;
            double min;
            double max;

            // if a second channel in the image is detected
            boolean isSecondChannel = false;
            BeadImage secondChannel = null;
            ImageStack secondStack = null;

            if (numChannel == 2) {

                secondChannel = new BeadImage();
                secondChannel.setFileAddress(fileAddress);
                secondChannel.setImageName(secondChannel.getImageName() + "_channel_2");
                secondStack = new ImageStack(width, height);
                isSecondChannel = true;
                secondChannel.setStack(secondStack);

                notifyObservers(MSG_NEW_CHANNEL_DETECTED, "Two channels detected", null, secondChannel);

            }

            stack = new ImageStack(width, height);

            for (int i = 0; i != num; i++) {
                setProgress(i, num);
                setStatus("Loading slice " + (i + 1) + "/" + num + "...");

                ImageProcessor ip = ipr.openProcessors(i)[0];

                if (isSecondChannel) {
                    i++;
                    ImageProcessor ip2 = ipr.openProcessors(i)[0];
                    secondStack.addSlice(ip2);
                    secondChannel.setStatus("Loading slice " + (i + 1) + "/" + num + "...");
                    secondChannel.setProgress(i, num);
                }

                min = ip.getMin();
                max = ip.getMax();

                if (min < minIntentisyOfWholeStack) {
                    minIntentisyOfWholeStack = min;
                }
                if (max > maxIntensityOfWholeStack) {
                    maxIntensityOfWholeStack = max;
                }

                stdDev = ip.getStatistics().stdDev;

                if (standardDeviations.getMax() < stdDev) {
                    beadFocusPlane = i;
                }
                standardDeviations.addValue(stdDev);
                stack.addSlice(ip);
                updateView(ip);
            }
            if (isSecondChannel) {
                autoFocus();
                secondChannel.autoFocus();
                secondChannel.setProgress(100);
            }
            try {
                ipr.close();
            } catch (Exception e) {
                System.err.println("Error when closing the image reader.");
            }
            setChanged();
            notifyObservers(new Message(this, MSG_IMAGE_OKAY));
            setStatus("Done.");
            setProgress(100);

            setImageHeight(stack.getHeight());
            setImageWidth(stack.getWidth());

            isValid = true;
            new ImagePlus("", stack).resetDisplayRange();
            // openImage();

        } catch (NullPointerException e) {
            notifyError("Image not valid.");
            e.printStackTrace();
            setProgress(0, "image not okay");
            return;
        } catch (FormatException e) {
            setProgress(0, "This image format is not supported.");
            notifyError("This image format is not supported");
            e.printStackTrace();
        } catch (IOException e) {

            notifyError("File not accessible");
            setProgress(0, "Image not reachable.");
            e.printStackTrace();
        }

    }
}

From source file:io.prestosql.tests.AbstractTestQueries.java

@Test
public void testTableSampleBernoulli() {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows()
            .size();/*ww w  .j av a  2s .c o m*/

    for (int i = 0; i < 100; i++) {
        List<MaterializedRow> values = computeActual("SELECT orderkey FROM orders TABLESAMPLE BERNOULLI (50)")
                .getMaterializedRows();

        assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows");
        stats.addValue(values.size() * 1.0 / total);
    }

    double mean = stats.getGeometricMean();
    assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
}

From source file:org.alfresco.bm.event.AbstractResultService.java

/**
 * {@inheritDoc}//from w  ww  . j av a  2  s . co  m
 */
@Override
public void getResults(ResultHandler handler, long startTime, long windowSize, long reportPeriod,
        boolean chartOnly) {
    /*
     * Keep track of all events' statistics.
     * It is possible to report more frequently than the window size.
     * For each report period in the reporting window, the statistics for the events need to be maintained.
     */

    if (handler == null) {
        throw new IllegalArgumentException("A result handler must be supplied.");
    }
    if (windowSize <= 0L) {
        throw new IllegalArgumentException("'windowSize' must be a non-zero, positive number.");
    }
    if (reportPeriod <= 0L) {
        throw new IllegalArgumentException("'reportPeriod' must be a non-zero, positive number.");
    }
    if (reportPeriod > windowSize) {
        throw new IllegalArgumentException("'reportPeriod' cannot more than the 'windowSize'.");
    }
    if (windowSize % reportPeriod != 0L) {
        throw new IllegalArgumentException("'windowSize' must be a multiple of 'reportPeriod'.");
    }

    // We have to keep statistics for each reporting period
    int windowMultiple = (int) (windowSize / reportPeriod);

    // Build stats for reporting back
    // Each LinkedList will have 'windowMultiple' entries.
    // The newest statistics will be the last in the linked list; results will be reported from the first entry each time.
    Map<String, LinkedList<DescriptiveStatistics>> statsByEventName = new HashMap<String, LinkedList<DescriptiveStatistics>>(
            13);
    Map<String, LinkedList<AtomicInteger>> failuresByEventName = new HashMap<String, LinkedList<AtomicInteger>>(
            13);

    // Our even queries use separate windows
    EventRecord firstResult = getFirstResult();
    if (firstResult == null) {
        // There is nothing
        return;
    }
    long firstResultStartTime = firstResult.getStartTime();
    EventRecord lastResult = getLastResult();
    long lastResultStartTime = lastResult.getStartTime();

    long queryWindowStartTime = Math.max(firstResultStartTime, startTime); // The start time is inclusive
    long queryWindowSize = lastResult.getStartTime() - firstResult.getStartTime();
    if (queryWindowSize < 60000L) {
        queryWindowSize = 60000L; // Query window is at least a minute
    } else if (queryWindowSize > (60000L * 60L)) {
        queryWindowSize = 60000L * 60L; // Query window is at most an hour
    }
    long queryWindowEndTime = queryWindowStartTime + queryWindowSize;

    // Rebase the aggregation window to encompasse the first event
    long currentWindowEndTime = (long) Math.floor((firstResultStartTime + reportPeriod) / reportPeriod)
            * reportPeriod;
    long currentWindowStartTime = currentWindowEndTime - windowSize;

    // Iterate over the results
    int skip = 0;
    int limit = 10000;
    boolean stop = false;
    boolean unreportedResults = false;
    breakStop: while (!stop) {
        List<EventRecord> results = getResults(queryWindowStartTime, queryWindowEndTime, chartOnly, skip,
                limit);
        if (results.size() == 0) {
            if (queryWindowEndTime > lastResultStartTime) {
                // The query window has included the last event, so we have extracted all results
                if (unreportedResults) {
                    // The query window ends in the future, so we are done
                    reportAndCycleStats(statsByEventName, failuresByEventName, currentWindowStartTime,
                            currentWindowEndTime, windowMultiple, handler);
                    unreportedResults = false;
                }
                stop = true;
            } else {
                // Move the query window up
                queryWindowStartTime = queryWindowEndTime;
                queryWindowEndTime += queryWindowSize;
                // Reset the skip count as we are in a new query window
                skip = 0;
            }
            // We continue
            continue;
        }
        // Process each result found in the query window
        for (EventRecord eventRecord : results) {
            String eventRecordName = eventRecord.getEvent().getName();
            long eventRecordStartTime = eventRecord.getStartTime();
            long eventRecordTime = eventRecord.getTime();
            boolean eventRecordSuccess = eventRecord.isSuccess();

            // If the current event is past the reporting period, then report
            if (eventRecordStartTime >= currentWindowEndTime) {
                // Report the current stats
                stop = reportAndCycleStats(statsByEventName, failuresByEventName, currentWindowStartTime,
                        currentWindowEndTime, windowMultiple, handler);
                unreportedResults = false;
                // Shift the window up by one report period
                currentWindowStartTime += reportPeriod;
                currentWindowEndTime += reportPeriod;
                // Check for stop
                if (stop) {
                    break breakStop;
                }
            }
            // Increase the skip with each window result
            skip++;

            // Ignore results we don't wish to chart
            if (chartOnly && !eventRecord.isChart()) {
                continue;
            }

            // We have to report this result at some point
            unreportedResults = true;

            // Get the linked list of stats for the event
            LinkedList<DescriptiveStatistics> eventStatsLL = statsByEventName.get(eventRecordName);
            if (eventStatsLL == null) {
                // Create a LL for the event
                eventStatsLL = new LinkedList<DescriptiveStatistics>();
                statsByEventName.put(eventRecordName, eventStatsLL);
                // We need at least one entry in order to record stats
                eventStatsLL.add(new DescriptiveStatistics());
            }
            // Write the current event to all the stats for the event
            for (DescriptiveStatistics eventStats : eventStatsLL) {
                eventStats.addValue(eventRecordTime);
            }

            // Get the linked list of failure counts for the event
            LinkedList<AtomicInteger> eventFailuresLL = failuresByEventName.get(eventRecordName);
            if (eventFailuresLL == null) {
                // Create a LL for the event
                eventFailuresLL = new LinkedList<AtomicInteger>();
                failuresByEventName.put(eventRecordName, eventFailuresLL);
                // Need one entry to record failures
                eventFailuresLL.add(new AtomicInteger(0));
            }
            // Write any failures to all counts for the event
            if (!eventRecordSuccess) {
                for (AtomicInteger eventFailures : eventFailuresLL) {
                    eventFailures.incrementAndGet();
                }
            }
        }
    }
}

From source file:org.alfresco.bm.event.mongo.MongoResultServiceTest.java

/**
 * Test the case where the reporting period is smaller than the stats window
 *///  w w w.j  a  v  a 2s.  com
@Test
public void getCheckedResultsUsingHandler() {
    pumpRecords(10);

    final AtomicInteger count = new AtomicInteger();
    final Map<String, DescriptiveStatistics> lastStatsByEventName = new HashMap<String, DescriptiveStatistics>(
            17);

    resultService.getResults(new ResultHandler() {
        @Override
        public boolean processResult(long fromTime, long toTime,
                Map<String, DescriptiveStatistics> statsByEventName, Map<String, Integer> failuresByEventName)
                throws Throwable {
            // Always keep the last stats
            lastStatsByEventName.clear();
            lastStatsByEventName.putAll(statsByEventName);

            count.incrementAndGet();
            return true;
        }
    }, 0L, 200L, 10L, false);
    // Check
    assertEquals(10, count.get());

    // Now go through the last stats received
    // Check it against the last window size
    List<String> names = resultService.getEventNames();
    for (String eventName : names) {
        List<EventRecord> eventResults = resultService.getResults(eventName, 0, 1000);
        DescriptiveStatistics eventStats = new DescriptiveStatistics();
        for (EventRecord eventRecord : eventResults) {
            eventStats.addValue(eventRecord.getTime());
        }
        DescriptiveStatistics lastEventStats = lastStatsByEventName.get(eventName);
        assertNotNull("No last report for event '" + eventName + "'.", lastEventStats);
        // Now check that this matched the last report exactly
        assertEquals("Mean for '" + eventName + "' was not correct. ", (long) Math.floor(eventStats.getMean()),
                (long) Math.floor(lastStatsByEventName.get(eventName).getMean()));
    }
}

From source file:org.apache.gobblin.salesforce.SalesforceSource.java

String generateSpecifiedPartitions(Histogram histogram, int minTargetPartitionSize, int maxPartitions,
        long lowWatermark, long expectedHighWatermark) {
    int interval = computeTargetPartitionSize(histogram, minTargetPartitionSize, maxPartitions);
    int totalGroups = histogram.getGroups().size();

    log.info("Histogram total record count: " + histogram.totalRecordCount);
    log.info("Histogram total groups: " + totalGroups);
    log.info("maxPartitions: " + maxPartitions);
    log.info("interval: " + interval);

    List<HistogramGroup> groups = histogram.getGroups();
    List<String> partitionPoints = new ArrayList<>();
    DescriptiveStatistics statistics = new DescriptiveStatistics();

    int count = 0;
    HistogramGroup group;/*w w  w . j av a  2s .  com*/
    Iterator<HistogramGroup> it = groups.iterator();

    while (it.hasNext()) {
        group = it.next();
        if (count == 0) {
            // Add a new partition point;
            partitionPoints.add(
                    Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
        }

        /**
         * Using greedy algorithm by keep adding group until it exceeds the interval size (x2)
         * Proof: Assuming nth group violates 2 x interval size, then all groups from 0th to (n-1)th, plus nth group,
         * will have total size larger or equal to interval x 2. Hence, we are saturating all intervals (with original size)
         * without leaving any unused space in between. We could choose x3,x4... but it is not space efficient.
         */
        if (count != 0 && count + group.count >= 2 * interval) {
            // Summarize current group
            statistics.addValue(count);
            // A step-in start
            partitionPoints.add(
                    Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
            count = group.count;
        } else {
            // Add group into current partition
            count += group.count;
        }

        if (count >= interval) {
            // Summarize current group
            statistics.addValue(count);
            // A fresh start next time
            count = 0;
        }
    }

    if (partitionPoints.isEmpty()) {
        throw new RuntimeException("Unexpected empty partition list");
    }

    if (count > 0) {
        // Summarize last group
        statistics.addValue(count);
    }

    // Add global high watermark as last point
    partitionPoints.add(Long.toString(expectedHighWatermark));

    log.info("Dynamic partitioning statistics: ");
    log.info("data: " + Arrays.toString(statistics.getValues()));
    log.info(statistics.toString());
    String specifiedPartitions = Joiner.on(",").join(partitionPoints);
    log.info("Calculated specified partitions: " + specifiedPartitions);
    return specifiedPartitions;
}

From source file:org.apache.groovy.perf.CompilerPerformanceTest.java

public static void main(String[] args) throws Exception {
    List<File> sources = new ArrayList<>();
    List<URL> classpath = new ArrayList<>();
    boolean isCp = false;
    for (String arg : args) {
        if ("-cp".equals(arg)) {
            isCp = true;// w ww .  jav a  2s.  com
        } else if (isCp) {
            classpath.add(new File(arg).toURI().toURL());
        } else {
            sources.add(new File(arg));
        }
    }
    ScriptCompilationExecuter executer = new ScriptCompilationExecuter(
            sources.toArray(new File[sources.size()]), classpath);
    System.out.println("Using Groovy " + GROOVY_VERSION);

    DescriptiveStatistics stats = new DescriptiveStatistics();

    for (int i = 0; i < WARMUP + REPEAT; i++) {
        if (i < WARMUP) {
            System.out.println("Warmup #" + (i + 1));
        } else {
            System.out.println("Round #" + (i - WARMUP));
        }
        long dur = executer.execute();
        System.gc();
        System.out.printf("Compile time = %dms%n", dur);
        if (i >= WARMUP) {
            stats.addValue((double) dur);
        }
    }

    System.out.println("Compilation took " + stats.getMean() + "ms  " + stats.getStandardDeviation() + "ms");
    FileWriter wrt = new FileWriter(new File("target/compilation-stats.csv"), true);
    wrt.append(String.format("%s;%s;%s\n", GROOVY_VERSION, stats.getMean(), stats.getStandardDeviation()));
    wrt.close();
}

From source file:org.apache.hadoop.hive.metastore.tools.MicroBenchmark.java

/**
 * Run the benchmark and measure run-time statistics in nanoseconds.<p>
 * Before the run the warm-up phase is executed.
 * @param pre Optional pre-test setup//from ww  w  . j  av a2s .co m
 * @param test Mandatory test
 * @param post Optional post-test cleanup
 * @return Statistics describing the results. All times are in nanoseconds.
 */
public DescriptiveStatistics measure(@Nullable Runnable pre, @NotNull Runnable test, @Nullable Runnable post) {
    // Warmup phase
    for (int i = 0; i < warmup; i++) {
        if (pre != null) {
            pre.run();
        }
        test.run();
        if (post != null) {
            post.run();
        }
    }
    // Run the benchmark
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = 0; i < iterations; i++) {
        if (pre != null) {
            pre.run();
        }
        long start = System.nanoTime();
        test.run();
        long end = System.nanoTime();
        stats.addValue((double) (end - start) / scaleFactor);
        if (post != null) {
            post.run();
        }
    }
    return stats;
}

From source file:org.apache.jackrabbit.oak.commons.benchmark.MicroBenchmark.java

private static DescriptiveStatistics runTest(Benchmark benchmark) throws Exception {
    final DescriptiveStatistics statistics = new DescriptiveStatistics();
    long runtimeEnd = System.currentTimeMillis() + TimeUnit.SECONDS.toMillis(60);
    while (System.currentTimeMillis() < runtimeEnd) {
        statistics.addValue(execute(benchmark));
    }//from w w  w .  j  ava2  s .c  o  m
    return statistics;
}