List of usage examples for org.apache.commons.math3.stat.descriptive SummaryStatistics SummaryStatistics
public SummaryStatistics()
From source file:org.apache.cassandra.dht.tokenallocator.ReplicationAwareTokenAllocatorTest.java
private void updateSummary(ReplicationAwareTokenAllocator<Unit> t, Summary su, Summary st, boolean print) { int size = t.sortedTokens.size(); double inverseAverage = 1.0 * size / t.strategy.replicas(); Map<Unit, Double> ownership = evaluateReplicatedOwnership(t); SummaryStatistics unitStat = new SummaryStatistics(); for (Map.Entry<Unit, Double> en : ownership.entrySet()) unitStat.addValue(en.getValue() * inverseAverage / t.unitToTokens.get(en.getKey()).size()); su.update(unitStat);//w w w . ja v a2s . c om SummaryStatistics tokenStat = new SummaryStatistics(); for (Token tok : t.sortedTokens.keySet()) tokenStat.addValue(replicatedTokenOwnership(tok, t.sortedTokens, t.strategy) * inverseAverage); st.update(tokenStat); if (print) { System.out.format("Size %d(%d) \tunit %s token %s %s\n", t.unitCount(), size, mms(unitStat), mms(tokenStat), t.strategy); System.out.format("Worst intermediate unit\t%s token %s\n", su, st); } }
From source file:org.apache.cassandra.dht.tokenallocator.TokenAllocation.java
public static SummaryStatistics replicatedOwnershipStats(TokenMetadata tokenMetadata, AbstractReplicationStrategy rs, InetAddress endpoint) { SummaryStatistics stat = new SummaryStatistics(); StrategyAdapter strategy = getStrategy(tokenMetadata, rs, endpoint); for (Map.Entry<InetAddress, Double> en : evaluateReplicatedOwnership(tokenMetadata, rs).entrySet()) { // Filter only in the same datacentre. if (strategy.inAllocationRing(en.getKey())) stat.addValue(en.getValue() / tokenMetadata.getTokens(en.getKey()).size()); }/*from w w w. j a va 2s . c om*/ return stat; }
From source file:org.apache.metron.common.dsl.functions.StellarStatistics.java
/** * @param windowSize The number of input data elements to maintain in memory. If * windowSize == 0, then no data elements will be maintained in * memory./*from ww w . j a v a2 s . c o m*/ */ public StellarStatistics(int windowSize) { // only one of the underlying implementation classes will be used at a time if (windowSize > 0) { descStats = new DescriptiveStatistics(windowSize); } else { summStats = new SummaryStatistics(); } }
From source file:org.apache.metron.common.math.stats.OnlineStatisticsProviderTest.java
private void validateEquality(Iterable<Double> values) { DescriptiveStatistics stats = new DescriptiveStatistics(); SummaryStatistics summaryStats = new SummaryStatistics(); OnlineStatisticsProvider statsProvider = new OnlineStatisticsProvider(); //Test that the aggregated provider gives the same results as the provider that is shown all the data. List<OnlineStatisticsProvider> providers = new ArrayList<>(); for (int i = 0; i < 10; ++i) { providers.add(new OnlineStatisticsProvider()); }/*from w w w . ja v a 2 s . com*/ int i = 0; for (double d : values) { i++; stats.addValue(d); summaryStats.addValue(d); providers.get(i % providers.size()).addValue(d); statsProvider.addValue(d); } StatisticsProvider aggregatedProvider = providers.get(0); for (int j = 1; j < providers.size(); ++j) { aggregatedProvider = aggregatedProvider.merge(providers.get(j)); } validateStatisticsProvider(statsProvider, summaryStats, stats); validateStatisticsProvider(aggregatedProvider, summaryStats, stats); }
From source file:org.apache.metron.common.stellar.StellarStatisticsFunctionsTest.java
@Before public void setup() { variables = new HashMap<>(); // test input data values = Arrays.asList(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0); // the DescriptiveStatistics is used for validation stats = new DescriptiveStatistics(1000); values.stream().forEach(val -> stats.addValue(val)); // the StatisticalSummary is used for validation summaryStats = new SummaryStatistics(); values.stream().forEach(val -> summaryStats.addValue(val)); }
From source file:org.apache.metron.statistics.StellarStatisticsFunctionsTest.java
@Test public void testMergeProviders() throws Exception { List<StatisticsProvider> providers = new ArrayList<>(); /*/*from www.j a v a 2 s . c o m*/ Create 10 providers, each with a sample drawn from a gaussian distribution. Update the reference stats from commons math to ensure we are */ GaussianRandomGenerator gaussian = new GaussianRandomGenerator(new MersenneTwister(1L)); SummaryStatistics sStatistics = new SummaryStatistics(); DescriptiveStatistics dStatistics = new DescriptiveStatistics(); for (int i = 0; i < 10; ++i) { List<Double> sample = new ArrayList<>(); for (int j = 0; j < 100; ++j) { double s = gaussian.nextNormalizedDouble(); sample.add(s); sStatistics.addValue(s); dStatistics.addValue(s); } StatisticsProvider provider = (StatisticsProvider) run( "STATS_ADD(STATS_INIT(), " + Joiner.on(",").join(sample) + ")", new HashMap<>()); providers.add(provider); } /* Merge the providers and validate */ Map<String, Object> providerVariables = new HashMap<>(); for (int i = 0; i < providers.size(); ++i) { providerVariables.put("provider_" + i, providers.get(i)); } StatisticsProvider mergedProvider = (StatisticsProvider) run( "STATS_MERGE([" + Joiner.on(",").join(providerVariables.keySet()) + "])", providerVariables); OnlineStatisticsProviderTest.validateStatisticsProvider(mergedProvider, sStatistics, dStatistics); }
From source file:org.apache.solr.cloud.autoscaling.sim.TestLargeCluster.java
public void benchmarkNodeLost() throws Exception { List<String> results = new ArrayList<>(); for (int wait : renard5x) { for (int delay : renard5x) { SummaryStatistics totalTime = new SummaryStatistics(); SummaryStatistics ignoredOurEvents = new SummaryStatistics(); SummaryStatistics ignoredOtherEvents = new SummaryStatistics(); SummaryStatistics startedOurEvents = new SummaryStatistics(); SummaryStatistics startedOtherEvents = new SummaryStatistics(); for (int i = 0; i < 5; i++) { if (cluster != null) { cluster.close();//from w w w . j a va2s.co m } setupCluster(); setUp(); setupTest(); long total = doTestNodeLost(wait, delay * 1000, 0); totalTime.addValue(total); // get event counts Map<String, Map<String, AtomicInteger>> counts = cluster.simGetEventCounts(); Map<String, AtomicInteger> map = counts.remove("node_lost_trigger"); startedOurEvents.addValue(map.getOrDefault("STARTED", ZERO).get()); ignoredOurEvents.addValue(map.getOrDefault("IGNORED", ZERO).get()); int otherStarted = 0; int otherIgnored = 0; for (Map<String, AtomicInteger> m : counts.values()) { otherStarted += m.getOrDefault("STARTED", ZERO).get(); otherIgnored += m.getOrDefault("IGNORED", ZERO).get(); } startedOtherEvents.addValue(otherStarted); ignoredOtherEvents.addValue(otherIgnored); } results.add(String.format(Locale.ROOT, "%d\t%d\t%4.0f\t%4.0f\t%4.0f\t%4.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f", wait, delay, startedOurEvents.getMean(), ignoredOurEvents.getMean(), startedOtherEvents.getMean(), ignoredOtherEvents.getMean(), totalTime.getMin(), totalTime.getMax(), totalTime.getMean(), totalTime.getStandardDeviation(), totalTime.getVariance())); } } log.info("===== RESULTS ======"); log.info("waitFor\tdelay\tSTRT\tIGN\toSTRT\toIGN\tmin\tmax\tmean\tstdev\tvar"); results.forEach(s -> log.info(s)); }
From source file:org.apache.tika.eval.tokens.LuceneTokenCounter.java
void count(String field) throws IOException { long tokenCount = leafReader.getSumTotalTermFreq(field); if (tokenCount > Integer.MAX_VALUE) { throw new IllegalArgumentException("can't handle longs"); }//w ww . ja v a 2 s . c o m int tokenCountInt = (int) tokenCount; int uniqueTokenCount = 0; SummaryStatistics summStats = new SummaryStatistics(); double ent = 0.0d; double p = 0.0d; double base = 2.0; Terms terms = leafReader.terms(field); if (terms == null) { //if there were no terms fieldStats.put(field, new TokenStatistics(uniqueTokenCount, tokenCountInt, new TokenIntPair[0], ent, summStats)); return; } TermsEnum termsEnum = terms.iterator(); BytesRef bytesRef = termsEnum.next(); TokenCountPriorityQueue queue = new TokenCountPriorityQueue(topN); while (bytesRef != null) { long termFreq = termsEnum.totalTermFreq(); if (termFreq > Integer.MAX_VALUE) { throw new IllegalArgumentException("Sorry can't handle longs yet"); } int tf = (int) termFreq; //TODO: figure out how to avoid Stringifying this //to get codepoint count String t = bytesRef.utf8ToString(); int len = t.codePointCount(0, t.length()); for (int i = 0; i < tf; i++) { summStats.addValue(len); } p = (double) tf / (double) tokenCount; ent += p * FastMath.log(base, p); if (queue.top() == null || queue.size() < topN || tf >= queue.top().getValue()) { queue.insertWithOverflow(new TokenIntPair(t, tf)); } uniqueTokenCount++; bytesRef = termsEnum.next(); } if (tokenCountInt > 0) { ent = (-1.0d / (double) tokenCountInt) * ent; } fieldStats.put(field, new TokenStatistics(uniqueTokenCount, tokenCountInt, queue.getArray(), ent, summStats)); }
From source file:org.apache.tika.eval.tokens.TokenCounter.java
private void _add(String field, Analyzer analyzer, String content) throws IOException { int totalTokens = 0; TokenStream ts = analyzer.tokenStream(field, content); CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); ts.reset();// w w w . j a v a2 s .c om Map<String, MutableInt> tokenMap = map.get(field); if (tokenMap == null) { tokenMap = new HashMap<>(); map.put(field, tokenMap); } while (ts.incrementToken()) { String token = termAtt.toString(); MutableInt cnt = tokenMap.get(token); if (cnt == null) { cnt = new MutableInt(1); tokenMap.put(token, cnt); } else { cnt.increment(); } totalTokens++; } ts.close(); ts.end(); int totalUniqueTokens = tokenMap.size(); double ent = 0.0d; double p = 0.0d; double base = 2.0; TokenCountPriorityQueue queue = new TokenCountPriorityQueue(topN); SummaryStatistics summaryStatistics = new SummaryStatistics(); for (Map.Entry<String, MutableInt> e : tokenMap.entrySet()) { String token = e.getKey(); int termFreq = e.getValue().intValue(); p = (double) termFreq / (double) totalTokens; ent += p * FastMath.log(base, p); int len = token.codePointCount(0, token.length()); for (int i = 0; i < e.getValue().intValue(); i++) { summaryStatistics.addValue(len); } if (queue.top() == null || queue.size() < topN || termFreq >= queue.top().getValue()) { queue.insertWithOverflow(new TokenIntPair(token, termFreq)); } } if (totalTokens > 0) { ent = (-1.0d / (double) totalTokens) * ent; } /* Collections.sort(allTokens); List<TokenIntPair> topNList = new ArrayList<>(topN); for (int i = 0; i < topN && i < allTokens.size(); i++) { topNList.add(allTokens.get(i)); }*/ tokenStatistics.put(field, new TokenStatistics(totalUniqueTokens, totalTokens, queue.getArray(), ent, summaryStatistics)); }
From source file:org.bml.util.rt.telemetry.track.AtomicIntegerTrack.java
private void initTrack(TRACK_TYPE trackType) { switch (trackType) { case DESCRIPTIVE: dStats = new SynchronizedDescriptiveStatistics(); break;//from www .j ava 2 s . c o m case SUMMARY: sStats = new SummaryStatistics(); break; } frequency = new Frequency(); }