Example usage for org.apache.commons.math3.stat.descriptive SummaryStatistics getStandardDeviation

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive SummaryStatistics getStandardDeviation.

Prototype

public double getStandardDeviation()

Source Link

Document

Returns the standard deviation of the values that have been added.

Usage

From source file:org.apache.cassandra.dht.tokenallocator.ReplicationAwareTokenAllocatorTest.java

private static String mms(SummaryStatistics s) {
    return String.format("max %.2f min %.2f stddev %.4f", s.getMax(), s.getMin(), s.getStandardDeviation());
}

From source file:org.apache.cassandra.dht.tokenallocator.TokenAllocation.java

public static Collection<Token> allocateTokens(final TokenMetadata tokenMetadata,
        final AbstractReplicationStrategy rs, final InetAddress endpoint, int numTokens) {
    StrategyAdapter strategy = getStrategy(tokenMetadata, rs, endpoint);
    Collection<Token> tokens = create(tokenMetadata, strategy).addUnit(endpoint, numTokens);
    tokens = adjustForCrossDatacenterClashes(tokenMetadata, strategy, tokens);

    if (logger.isWarnEnabled()) {
        logger.warn("Selected tokens {}", tokens);
        SummaryStatistics os = replicatedOwnershipStats(tokenMetadata, rs, endpoint);
        TokenMetadata tokenMetadataCopy = tokenMetadata.cloneOnlyTokenMap();
        tokenMetadataCopy.updateNormalTokens(tokens, endpoint);
        SummaryStatistics ns = replicatedOwnershipStats(tokenMetadataCopy, rs, endpoint);
        logger.warn("Replicated node load in datacentre before allocation {}", statToString(os));
        logger.warn("Replicated node load in datacentre after allocation {}", statToString(ns));

        // TODO: Is it worth doing the replicated ownership calculation always to be able to raise this alarm?
        if (ns.getStandardDeviation() > os.getStandardDeviation())
            logger.warn("Unexpected growth in standard deviation after allocation.");
    }/*from w w  w . ja  v  a  2 s. c  om*/
    return tokens;
}

From source file:org.apache.cassandra.dht.tokenallocator.TokenAllocation.java

public static String statToString(SummaryStatistics stat) {
    return String.format("max %.2f min %.2f stddev %.4f", stat.getMax() / stat.getMean(),
            stat.getMin() / stat.getMean(), stat.getStandardDeviation());
}

From source file:org.apache.solr.client.solrj.io.eval.HistogramEvaluator.java

@Override
public Object doWork(Object... values) throws IOException {
    if (Arrays.stream(values).anyMatch(item -> null == item)) {
        return null;
    }//from   w w w  . j a  v  a2  s .c o  m

    List<?> sourceValues;
    Integer bins = 10;

    if (values.length >= 1) {
        sourceValues = values[0] instanceof List<?> ? (List<?>) values[0] : Arrays.asList(values[0]);

        if (values.length >= 2) {
            if (values[1] instanceof Number) {
                bins = ((Number) values[1]).intValue();
            } else {
                throw new IOException(String.format(Locale.ROOT,
                        "Invalid expression %s - if second parameter is provided then it must be a valid number but found %s instead",
                        toExpression(constructingFactory), values[1].getClass().getSimpleName()));
            }
        }
    } else {
        throw new IOException(
                String.format(Locale.ROOT, "Invalid expression %s - expecting at least one value but found %d",
                        toExpression(constructingFactory), containedEvaluators.size()));
    }

    EmpiricalDistribution distribution = new EmpiricalDistribution(bins);
    distribution.load(
            ((List<?>) sourceValues).stream().mapToDouble(value -> ((Number) value).doubleValue()).toArray());
    ;

    List<Tuple> histogramBins = new ArrayList<>();
    for (SummaryStatistics binSummary : distribution.getBinStats()) {
        Map<String, Number> map = new HashMap<>();
        map.put("max", binSummary.getMax());
        map.put("mean", binSummary.getMean());
        map.put("min", binSummary.getMin());
        map.put("stdev", binSummary.getStandardDeviation());
        map.put("sum", binSummary.getSum());
        map.put("N", binSummary.getN());
        map.put("var", binSummary.getVariance());
        map.put("cumProb", distribution.cumulativeProbability(binSummary.getMean()));
        map.put("prob", distribution.probability(binSummary.getMin(), binSummary.getMax()));
        histogramBins.add(new Tuple(map));
    }

    return histogramBins;
}

From source file:org.apache.solr.cloud.autoscaling.sim.TestLargeCluster.java

public void benchmarkNodeLost() throws Exception {
    List<String> results = new ArrayList<>();
    for (int wait : renard5x) {
        for (int delay : renard5x) {
            SummaryStatistics totalTime = new SummaryStatistics();
            SummaryStatistics ignoredOurEvents = new SummaryStatistics();
            SummaryStatistics ignoredOtherEvents = new SummaryStatistics();
            SummaryStatistics startedOurEvents = new SummaryStatistics();
            SummaryStatistics startedOtherEvents = new SummaryStatistics();
            for (int i = 0; i < 5; i++) {
                if (cluster != null) {
                    cluster.close();//from   w ww .j  a  v a2s. c  o m
                }
                setupCluster();
                setUp();
                setupTest();
                long total = doTestNodeLost(wait, delay * 1000, 0);
                totalTime.addValue(total);
                // get event counts
                Map<String, Map<String, AtomicInteger>> counts = cluster.simGetEventCounts();
                Map<String, AtomicInteger> map = counts.remove("node_lost_trigger");
                startedOurEvents.addValue(map.getOrDefault("STARTED", ZERO).get());
                ignoredOurEvents.addValue(map.getOrDefault("IGNORED", ZERO).get());
                int otherStarted = 0;
                int otherIgnored = 0;
                for (Map<String, AtomicInteger> m : counts.values()) {
                    otherStarted += m.getOrDefault("STARTED", ZERO).get();
                    otherIgnored += m.getOrDefault("IGNORED", ZERO).get();
                }
                startedOtherEvents.addValue(otherStarted);
                ignoredOtherEvents.addValue(otherIgnored);
            }
            results.add(String.format(Locale.ROOT,
                    "%d\t%d\t%4.0f\t%4.0f\t%4.0f\t%4.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f", wait, delay,
                    startedOurEvents.getMean(), ignoredOurEvents.getMean(), startedOtherEvents.getMean(),
                    ignoredOtherEvents.getMean(), totalTime.getMin(), totalTime.getMax(), totalTime.getMean(),
                    totalTime.getStandardDeviation(), totalTime.getVariance()));
        }
    }
    log.info("===== RESULTS ======");
    log.info("waitFor\tdelay\tSTRT\tIGN\toSTRT\toIGN\tmin\tmax\tmean\tstdev\tvar");
    results.forEach(s -> log.info(s));
}

From source file:org.apache.tika.eval.AbstractProfiler.java

/**
 * Checks to see if metadata is null or content is empty (null or only whitespace).
 * If any of these, then this does no processing, and the fileId is not
 * entered into the content table.//ww  w  .  j a  va  2s.  com
 *
 * @param fileId
 * @param m
 * @param fieldName
 * @param contentsTable
 */
protected void writeContentData(String fileId, Metadata m, String fieldName, TableInfo contentsTable)
        throws IOException {
    if (m == null) {
        return;
    }
    Map<Cols, String> data = new HashMap<>();
    String content = getContent(m, maxContentLength, data);
    if (content == null || content.trim().length() == 0) {
        return;
    }
    tokenCounter.clear(fieldName);
    tokenCounter.add(fieldName, content);

    data.put(Cols.ID, fileId);
    data.put(Cols.CONTENT_LENGTH, Integer.toString(content.length()));
    langid(m, data);
    String langid = data.get(Cols.LANG_ID_1);
    langid = (langid == null) ? "" : langid;

    writeTokenCounts(data, fieldName, tokenCounter);
    CommonTokenResult commonTokenResult = null;
    try {
        commonTokenResult = commonTokenCountManager.countTokenOverlaps(langid,
                tokenCounter.getTokens(fieldName));
    } catch (IOException e) {
        LOG.error("{}", e.getMessage(), e);
    }
    data.put(Cols.COMMON_TOKENS_LANG, commonTokenResult.getLangCode());
    data.put(Cols.NUM_COMMON_TOKENS, Integer.toString(commonTokenResult.getCommonTokens()));
    TokenStatistics tokenStatistics = tokenCounter.getTokenStatistics(fieldName);
    data.put(Cols.NUM_UNIQUE_TOKENS, Integer.toString(tokenStatistics.getTotalUniqueTokens()));
    data.put(Cols.NUM_TOKENS, Integer.toString(tokenStatistics.getTotalTokens()));
    data.put(Cols.NUM_ALPHABETIC_TOKENS, Integer.toString(commonTokenResult.getAlphabeticTokens()));

    data.put(Cols.TOKEN_ENTROPY_RATE, Double.toString(tokenStatistics.getEntropy()));
    SummaryStatistics summStats = tokenStatistics.getSummaryStatistics();
    data.put(Cols.TOKEN_LENGTH_SUM, Integer.toString((int) summStats.getSum()));

    data.put(Cols.TOKEN_LENGTH_MEAN, Double.toString(summStats.getMean()));

    data.put(Cols.TOKEN_LENGTH_STD_DEV, Double.toString(summStats.getStandardDeviation()));
    unicodeBlocks(m, data);
    try {
        writer.writeRow(contentsTable, data);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.tika.eval.tokens.TokenStatistics.java

@Override
public boolean equals(Object o) {

    if (this == o)
        return true;
    if (o == null || getClass() != o.getClass())
        return false;

    TokenStatistics that = (TokenStatistics) o;

    if (totalTokens != that.totalTokens)
        return false;
    if (totalUniqueTokens != that.totalUniqueTokens)
        return false;
    if (!doubleEquals(that.entropy, entropy))
        return false;
    // Probably incorrect - comparing Object[] arrays with Arrays.equals
    if (!Arrays.equals(topN, that.topN))
        return false;

    SummaryStatistics thatS = ((TokenStatistics) o).summaryStatistics;
    if (summaryStatistics.getN() != thatS.getN())
        return false;

    //if both have n==0, don't bother with the stats
    if (summaryStatistics.getN() == 0L)
        return true;
    //TODO: consider adding others...
    if (!doubleEquals(summaryStatistics.getGeometricMean(), thatS.getGeometricMean()))
        return false;
    if (!doubleEquals(summaryStatistics.getMax(), thatS.getMax()))
        return false;
    if (!doubleEquals(summaryStatistics.getMean(), thatS.getMean()))
        return false;
    if (!doubleEquals(summaryStatistics.getMin(), thatS.getMin()))
        return false;
    if (!doubleEquals(summaryStatistics.getSum(), thatS.getSum()))
        return false;
    if (!doubleEquals(summaryStatistics.getStandardDeviation(), thatS.getStandardDeviation()))
        return false;
    return true;/* www. j  a  va 2  s  . c  om*/
}

From source file:org.calrissian.accumulorecipes.metricsstore.ext.stats.impl.AccumuloStatsMetricStoreTest.java

@Test
public void testStatisticAccuracy() throws Exception {
    AccumuloStatsMetricStore metricStore = new AccumuloStatsMetricStore(getConnector());

    Random random = new Random();

    List<Long> sampleData = asList((long) random.nextInt(10000), (long) random.nextInt(10000),
            (long) random.nextInt(10000), (long) random.nextInt(10000), (long) random.nextInt(10000));

    //use commons math as a
    SummaryStatistics sumStats = new SummaryStatistics();
    for (long num : sampleData)
        sumStats.addValue(num);//from   ww  w .  ja  va  2 s  .c om

    final long timestamp = System.currentTimeMillis();
    Iterable<Metric> testData = transform(sampleData, new Function<Long, Metric>() {
        @Override
        public Metric apply(Long num) {
            return new Metric(timestamp, "group", "type", "name", "", num);
        }
    });

    metricStore.save(testData);

    List<Stats> stats = newArrayList(metricStore.queryStats(new Date(0), new Date(), "group", "type", "name",
            MetricTimeUnit.MINUTES, new Auths()));

    assertEquals(1, stats.size());
    Stats stat = stats.get(0);

    assertEquals(sumStats.getMin(), stat.getMin(), Double.MIN_NORMAL);
    assertEquals(sumStats.getMax(), stat.getMax(), Double.MIN_NORMAL);
    assertEquals(sumStats.getSum(), stat.getSum(), Double.MIN_NORMAL);
    assertEquals(sumStats.getN(), stat.getCount(), Double.MIN_NORMAL);
    assertEquals(sumStats.getMean(), stat.getMean(), Double.MIN_NORMAL);
    assertEquals(sumStats.getPopulationVariance(), stat.getVariance(), 0.00000001);
    assertEquals(sumStats.getVariance(), stat.getVariance(true), 0.00000001);
    assertEquals(sqrt(sumStats.getPopulationVariance()), stat.getStdDev(), 0.00000001);
    assertEquals(sumStats.getStandardDeviation(), stat.getStdDev(true), 0.00000001);
}

From source file:org.cloudsimplus.sla.responsetime.CloudletResponseTimeMinimizationRunner.java

private void showConfidenceInterval(SummaryStatistics stats) {
    // Calculate 95% confidence interval
    double intervalSize = computeConfidenceErrorMargin(stats, 0.95);
    double lower = stats.getMean() - intervalSize;
    double upper = stats.getMean() + intervalSize;
    System.out.printf("\tResponse time mean 95%% Confidence Interval: %.2f  %.2f, that is [%.2f to %.2f]\n",
            stats.getMean(), intervalSize, lower, upper);
    System.out.printf("\tStandard Deviation: %.2f \n", stats.getStandardDeviation());
}

From source file:org.cloudsimplus.testbeds.ExperimentRunner.java

/**
 * <p>/*  w  w  w.  jav  a 2 s.co m*/
 * Computes the confidence interval error margin for a given set of samples
 * in order to enable finding the interval lower and upper bound around a
 * mean value. By this way, the confidence interval can be computed as [mean
 * + errorMargin .. mean - errorMargin].
 * </p>
 *
 * <p>
 * To reduce the confidence interval by half, one have to execute the
 * experiments 4 more times. This is called the "Replication Method" and
 * just works when the samples are i.i.d. (independent and identically
 * distributed). Thus, if you have correlation between samples of each
 * simulation run, a different method such as a bias compensation,
 * {@link #isApplyBatchMeansMethod() batch means} or regenerative method has
 * to be used. </p>
 *
 * <b>NOTE:</b> How to compute the error margin is a little bit confusing.
 * The Harry Perros' book states that if less than 30 samples are collected,
 * the t-Distribution has to be used to that purpose.
 *
 * However, this article
 * <a href="https://en.wikipedia.org/wiki/Confidence_interval#Basic_Steps">Wikipedia
 * article</a>
 * says that if the standard deviation of the real population is known, it
 * has to be used the z-value from the Standard Normal Distribution.
 * Otherwise, it has to be used the t-value from the t-Distribution to
 * calculate the critical value for defining the error margin (also called
 * standard error). The book "Numeric Computation and Statistical Data
 * Analysis on the Java Platform" confirms the last statement and such
 * approach was followed.
 *
 * @param stats the statistic object with the values to compute the error
 * margin of the confidence interval
 * @param confidenceLevel the confidence level, in the interval from ]0 to
 * 1[, such as 0.95 to indicate 95% of confidence.
 * @return the error margin to compute the lower and upper bound of the
 * confidence interval
 *
 * @see
 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda3672.htm">Critical
 * Values of the Student's t Distribution</a>
 * @see
 * <a href="https://en.wikipedia.org/wiki/Student%27s_t-distribution">t-Distribution</a>
 * @see <a href="http://www4.ncsu.edu/~hp/files/simulation.pdf">Harry
 * Perros, "Computer Simulation Techniques: The definitive introduction!,"
 * 2009</a>
 * @see <a href="http://www.springer.com/gp/book/9783319285290">Numeric
 * Computation and Statistical Data Analysis on the Java Platform</a>
 */
protected double computeConfidenceErrorMargin(SummaryStatistics stats, double confidenceLevel) {
    try {
        // Creates a T-Distribution with N-1 degrees of freedom
        final double degreesOfFreedom = stats.getN() - 1;

        /*
        The t-Distribution is used to determine the probability that
        the real population mean lies in a given interval.
         */
        TDistribution tDist = new TDistribution(degreesOfFreedom);
        final double significance = 1.0 - confidenceLevel;
        final double criticalValue = tDist.inverseCumulativeProbability(1.0 - significance / 2.0);
        System.out.printf("\n\tt-Distribution critical value for %d samples: %f\n", stats.getN(),
                criticalValue);

        // Calculates the confidence interval error margin
        return criticalValue * stats.getStandardDeviation() / Math.sqrt(stats.getN());
    } catch (MathIllegalArgumentException e) {
        return Double.NaN;
    }
}