List of usage examples for org.apache.commons.math3.stat.descriptive SummaryStatistics getStandardDeviation
public double getStandardDeviation()
From source file:org.apache.cassandra.dht.tokenallocator.ReplicationAwareTokenAllocatorTest.java
private static String mms(SummaryStatistics s) { return String.format("max %.2f min %.2f stddev %.4f", s.getMax(), s.getMin(), s.getStandardDeviation()); }
From source file:org.apache.cassandra.dht.tokenallocator.TokenAllocation.java
public static Collection<Token> allocateTokens(final TokenMetadata tokenMetadata, final AbstractReplicationStrategy rs, final InetAddress endpoint, int numTokens) { StrategyAdapter strategy = getStrategy(tokenMetadata, rs, endpoint); Collection<Token> tokens = create(tokenMetadata, strategy).addUnit(endpoint, numTokens); tokens = adjustForCrossDatacenterClashes(tokenMetadata, strategy, tokens); if (logger.isWarnEnabled()) { logger.warn("Selected tokens {}", tokens); SummaryStatistics os = replicatedOwnershipStats(tokenMetadata, rs, endpoint); TokenMetadata tokenMetadataCopy = tokenMetadata.cloneOnlyTokenMap(); tokenMetadataCopy.updateNormalTokens(tokens, endpoint); SummaryStatistics ns = replicatedOwnershipStats(tokenMetadataCopy, rs, endpoint); logger.warn("Replicated node load in datacentre before allocation {}", statToString(os)); logger.warn("Replicated node load in datacentre after allocation {}", statToString(ns)); // TODO: Is it worth doing the replicated ownership calculation always to be able to raise this alarm? if (ns.getStandardDeviation() > os.getStandardDeviation()) logger.warn("Unexpected growth in standard deviation after allocation."); }/*from w w w . ja v a 2 s. c om*/ return tokens; }
From source file:org.apache.cassandra.dht.tokenallocator.TokenAllocation.java
public static String statToString(SummaryStatistics stat) { return String.format("max %.2f min %.2f stddev %.4f", stat.getMax() / stat.getMean(), stat.getMin() / stat.getMean(), stat.getStandardDeviation()); }
From source file:org.apache.solr.client.solrj.io.eval.HistogramEvaluator.java
@Override public Object doWork(Object... values) throws IOException { if (Arrays.stream(values).anyMatch(item -> null == item)) { return null; }//from w w w . j a v a2 s .c o m List<?> sourceValues; Integer bins = 10; if (values.length >= 1) { sourceValues = values[0] instanceof List<?> ? (List<?>) values[0] : Arrays.asList(values[0]); if (values.length >= 2) { if (values[1] instanceof Number) { bins = ((Number) values[1]).intValue(); } else { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - if second parameter is provided then it must be a valid number but found %s instead", toExpression(constructingFactory), values[1].getClass().getSimpleName())); } } } else { throw new IOException( String.format(Locale.ROOT, "Invalid expression %s - expecting at least one value but found %d", toExpression(constructingFactory), containedEvaluators.size())); } EmpiricalDistribution distribution = new EmpiricalDistribution(bins); distribution.load( ((List<?>) sourceValues).stream().mapToDouble(value -> ((Number) value).doubleValue()).toArray()); ; List<Tuple> histogramBins = new ArrayList<>(); for (SummaryStatistics binSummary : distribution.getBinStats()) { Map<String, Number> map = new HashMap<>(); map.put("max", binSummary.getMax()); map.put("mean", binSummary.getMean()); map.put("min", binSummary.getMin()); map.put("stdev", binSummary.getStandardDeviation()); map.put("sum", binSummary.getSum()); map.put("N", binSummary.getN()); map.put("var", binSummary.getVariance()); map.put("cumProb", distribution.cumulativeProbability(binSummary.getMean())); map.put("prob", distribution.probability(binSummary.getMin(), binSummary.getMax())); histogramBins.add(new Tuple(map)); } return histogramBins; }
From source file:org.apache.solr.cloud.autoscaling.sim.TestLargeCluster.java
public void benchmarkNodeLost() throws Exception { List<String> results = new ArrayList<>(); for (int wait : renard5x) { for (int delay : renard5x) { SummaryStatistics totalTime = new SummaryStatistics(); SummaryStatistics ignoredOurEvents = new SummaryStatistics(); SummaryStatistics ignoredOtherEvents = new SummaryStatistics(); SummaryStatistics startedOurEvents = new SummaryStatistics(); SummaryStatistics startedOtherEvents = new SummaryStatistics(); for (int i = 0; i < 5; i++) { if (cluster != null) { cluster.close();//from w ww .j a v a2s. c o m } setupCluster(); setUp(); setupTest(); long total = doTestNodeLost(wait, delay * 1000, 0); totalTime.addValue(total); // get event counts Map<String, Map<String, AtomicInteger>> counts = cluster.simGetEventCounts(); Map<String, AtomicInteger> map = counts.remove("node_lost_trigger"); startedOurEvents.addValue(map.getOrDefault("STARTED", ZERO).get()); ignoredOurEvents.addValue(map.getOrDefault("IGNORED", ZERO).get()); int otherStarted = 0; int otherIgnored = 0; for (Map<String, AtomicInteger> m : counts.values()) { otherStarted += m.getOrDefault("STARTED", ZERO).get(); otherIgnored += m.getOrDefault("IGNORED", ZERO).get(); } startedOtherEvents.addValue(otherStarted); ignoredOtherEvents.addValue(otherIgnored); } results.add(String.format(Locale.ROOT, "%d\t%d\t%4.0f\t%4.0f\t%4.0f\t%4.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f", wait, delay, startedOurEvents.getMean(), ignoredOurEvents.getMean(), startedOtherEvents.getMean(), ignoredOtherEvents.getMean(), totalTime.getMin(), totalTime.getMax(), totalTime.getMean(), totalTime.getStandardDeviation(), totalTime.getVariance())); } } log.info("===== RESULTS ======"); log.info("waitFor\tdelay\tSTRT\tIGN\toSTRT\toIGN\tmin\tmax\tmean\tstdev\tvar"); results.forEach(s -> log.info(s)); }
From source file:org.apache.tika.eval.AbstractProfiler.java
/** * Checks to see if metadata is null or content is empty (null or only whitespace). * If any of these, then this does no processing, and the fileId is not * entered into the content table.//ww w . j a va 2s. com * * @param fileId * @param m * @param fieldName * @param contentsTable */ protected void writeContentData(String fileId, Metadata m, String fieldName, TableInfo contentsTable) throws IOException { if (m == null) { return; } Map<Cols, String> data = new HashMap<>(); String content = getContent(m, maxContentLength, data); if (content == null || content.trim().length() == 0) { return; } tokenCounter.clear(fieldName); tokenCounter.add(fieldName, content); data.put(Cols.ID, fileId); data.put(Cols.CONTENT_LENGTH, Integer.toString(content.length())); langid(m, data); String langid = data.get(Cols.LANG_ID_1); langid = (langid == null) ? "" : langid; writeTokenCounts(data, fieldName, tokenCounter); CommonTokenResult commonTokenResult = null; try { commonTokenResult = commonTokenCountManager.countTokenOverlaps(langid, tokenCounter.getTokens(fieldName)); } catch (IOException e) { LOG.error("{}", e.getMessage(), e); } data.put(Cols.COMMON_TOKENS_LANG, commonTokenResult.getLangCode()); data.put(Cols.NUM_COMMON_TOKENS, Integer.toString(commonTokenResult.getCommonTokens())); TokenStatistics tokenStatistics = tokenCounter.getTokenStatistics(fieldName); data.put(Cols.NUM_UNIQUE_TOKENS, Integer.toString(tokenStatistics.getTotalUniqueTokens())); data.put(Cols.NUM_TOKENS, Integer.toString(tokenStatistics.getTotalTokens())); data.put(Cols.NUM_ALPHABETIC_TOKENS, Integer.toString(commonTokenResult.getAlphabeticTokens())); data.put(Cols.TOKEN_ENTROPY_RATE, Double.toString(tokenStatistics.getEntropy())); SummaryStatistics summStats = tokenStatistics.getSummaryStatistics(); data.put(Cols.TOKEN_LENGTH_SUM, Integer.toString((int) summStats.getSum())); data.put(Cols.TOKEN_LENGTH_MEAN, Double.toString(summStats.getMean())); data.put(Cols.TOKEN_LENGTH_STD_DEV, Double.toString(summStats.getStandardDeviation())); unicodeBlocks(m, data); try { writer.writeRow(contentsTable, data); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.tika.eval.tokens.TokenStatistics.java
@Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; TokenStatistics that = (TokenStatistics) o; if (totalTokens != that.totalTokens) return false; if (totalUniqueTokens != that.totalUniqueTokens) return false; if (!doubleEquals(that.entropy, entropy)) return false; // Probably incorrect - comparing Object[] arrays with Arrays.equals if (!Arrays.equals(topN, that.topN)) return false; SummaryStatistics thatS = ((TokenStatistics) o).summaryStatistics; if (summaryStatistics.getN() != thatS.getN()) return false; //if both have n==0, don't bother with the stats if (summaryStatistics.getN() == 0L) return true; //TODO: consider adding others... if (!doubleEquals(summaryStatistics.getGeometricMean(), thatS.getGeometricMean())) return false; if (!doubleEquals(summaryStatistics.getMax(), thatS.getMax())) return false; if (!doubleEquals(summaryStatistics.getMean(), thatS.getMean())) return false; if (!doubleEquals(summaryStatistics.getMin(), thatS.getMin())) return false; if (!doubleEquals(summaryStatistics.getSum(), thatS.getSum())) return false; if (!doubleEquals(summaryStatistics.getStandardDeviation(), thatS.getStandardDeviation())) return false; return true;/* www. j a va 2 s . c om*/ }
From source file:org.calrissian.accumulorecipes.metricsstore.ext.stats.impl.AccumuloStatsMetricStoreTest.java
@Test public void testStatisticAccuracy() throws Exception { AccumuloStatsMetricStore metricStore = new AccumuloStatsMetricStore(getConnector()); Random random = new Random(); List<Long> sampleData = asList((long) random.nextInt(10000), (long) random.nextInt(10000), (long) random.nextInt(10000), (long) random.nextInt(10000), (long) random.nextInt(10000)); //use commons math as a SummaryStatistics sumStats = new SummaryStatistics(); for (long num : sampleData) sumStats.addValue(num);//from ww w . ja va 2 s .c om final long timestamp = System.currentTimeMillis(); Iterable<Metric> testData = transform(sampleData, new Function<Long, Metric>() { @Override public Metric apply(Long num) { return new Metric(timestamp, "group", "type", "name", "", num); } }); metricStore.save(testData); List<Stats> stats = newArrayList(metricStore.queryStats(new Date(0), new Date(), "group", "type", "name", MetricTimeUnit.MINUTES, new Auths())); assertEquals(1, stats.size()); Stats stat = stats.get(0); assertEquals(sumStats.getMin(), stat.getMin(), Double.MIN_NORMAL); assertEquals(sumStats.getMax(), stat.getMax(), Double.MIN_NORMAL); assertEquals(sumStats.getSum(), stat.getSum(), Double.MIN_NORMAL); assertEquals(sumStats.getN(), stat.getCount(), Double.MIN_NORMAL); assertEquals(sumStats.getMean(), stat.getMean(), Double.MIN_NORMAL); assertEquals(sumStats.getPopulationVariance(), stat.getVariance(), 0.00000001); assertEquals(sumStats.getVariance(), stat.getVariance(true), 0.00000001); assertEquals(sqrt(sumStats.getPopulationVariance()), stat.getStdDev(), 0.00000001); assertEquals(sumStats.getStandardDeviation(), stat.getStdDev(true), 0.00000001); }
From source file:org.cloudsimplus.sla.responsetime.CloudletResponseTimeMinimizationRunner.java
private void showConfidenceInterval(SummaryStatistics stats) { // Calculate 95% confidence interval double intervalSize = computeConfidenceErrorMargin(stats, 0.95); double lower = stats.getMean() - intervalSize; double upper = stats.getMean() + intervalSize; System.out.printf("\tResponse time mean 95%% Confidence Interval: %.2f %.2f, that is [%.2f to %.2f]\n", stats.getMean(), intervalSize, lower, upper); System.out.printf("\tStandard Deviation: %.2f \n", stats.getStandardDeviation()); }
From source file:org.cloudsimplus.testbeds.ExperimentRunner.java
/** * <p>/* w w w. jav a 2 s.co m*/ * Computes the confidence interval error margin for a given set of samples * in order to enable finding the interval lower and upper bound around a * mean value. By this way, the confidence interval can be computed as [mean * + errorMargin .. mean - errorMargin]. * </p> * * <p> * To reduce the confidence interval by half, one have to execute the * experiments 4 more times. This is called the "Replication Method" and * just works when the samples are i.i.d. (independent and identically * distributed). Thus, if you have correlation between samples of each * simulation run, a different method such as a bias compensation, * {@link #isApplyBatchMeansMethod() batch means} or regenerative method has * to be used. </p> * * <b>NOTE:</b> How to compute the error margin is a little bit confusing. * The Harry Perros' book states that if less than 30 samples are collected, * the t-Distribution has to be used to that purpose. * * However, this article * <a href="https://en.wikipedia.org/wiki/Confidence_interval#Basic_Steps">Wikipedia * article</a> * says that if the standard deviation of the real population is known, it * has to be used the z-value from the Standard Normal Distribution. * Otherwise, it has to be used the t-value from the t-Distribution to * calculate the critical value for defining the error margin (also called * standard error). The book "Numeric Computation and Statistical Data * Analysis on the Java Platform" confirms the last statement and such * approach was followed. * * @param stats the statistic object with the values to compute the error * margin of the confidence interval * @param confidenceLevel the confidence level, in the interval from ]0 to * 1[, such as 0.95 to indicate 95% of confidence. * @return the error margin to compute the lower and upper bound of the * confidence interval * * @see * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda3672.htm">Critical * Values of the Student's t Distribution</a> * @see * <a href="https://en.wikipedia.org/wiki/Student%27s_t-distribution">t-Distribution</a> * @see <a href="http://www4.ncsu.edu/~hp/files/simulation.pdf">Harry * Perros, "Computer Simulation Techniques: The definitive introduction!," * 2009</a> * @see <a href="http://www.springer.com/gp/book/9783319285290">Numeric * Computation and Statistical Data Analysis on the Java Platform</a> */ protected double computeConfidenceErrorMargin(SummaryStatistics stats, double confidenceLevel) { try { // Creates a T-Distribution with N-1 degrees of freedom final double degreesOfFreedom = stats.getN() - 1; /* The t-Distribution is used to determine the probability that the real population mean lies in a given interval. */ TDistribution tDist = new TDistribution(degreesOfFreedom); final double significance = 1.0 - confidenceLevel; final double criticalValue = tDist.inverseCumulativeProbability(1.0 - significance / 2.0); System.out.printf("\n\tt-Distribution critical value for %d samples: %f\n", stats.getN(), criticalValue); // Calculates the confidence interval error margin return criticalValue * stats.getStandardDeviation() / Math.sqrt(stats.getN()); } catch (MathIllegalArgumentException e) { return Double.NaN; } }