Example usage for org.apache.commons.math3.stat.descriptive SummaryStatistics SummaryStatistics

List of usage examples for org.apache.commons.math3.stat.descriptive SummaryStatistics SummaryStatistics

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive SummaryStatistics SummaryStatistics.

Prototype

public SummaryStatistics() 

Source Link

Document

Construct a SummaryStatistics instance

Usage

From source file:org.apache.cassandra.dht.tokenallocator.ReplicationAwareTokenAllocatorTest.java

private void updateSummary(ReplicationAwareTokenAllocator<Unit> t, Summary su, Summary st, boolean print) {
    int size = t.sortedTokens.size();
    double inverseAverage = 1.0 * size / t.strategy.replicas();

    Map<Unit, Double> ownership = evaluateReplicatedOwnership(t);
    SummaryStatistics unitStat = new SummaryStatistics();
    for (Map.Entry<Unit, Double> en : ownership.entrySet())
        unitStat.addValue(en.getValue() * inverseAverage / t.unitToTokens.get(en.getKey()).size());
    su.update(unitStat);//w  w  w  . ja  v a2s .  c  om

    SummaryStatistics tokenStat = new SummaryStatistics();
    for (Token tok : t.sortedTokens.keySet())
        tokenStat.addValue(replicatedTokenOwnership(tok, t.sortedTokens, t.strategy) * inverseAverage);
    st.update(tokenStat);

    if (print) {
        System.out.format("Size %d(%d)   \tunit %s  token %s   %s\n", t.unitCount(), size, mms(unitStat),
                mms(tokenStat), t.strategy);
        System.out.format("Worst intermediate unit\t%s  token %s\n", su, st);
    }
}

From source file:org.apache.cassandra.dht.tokenallocator.TokenAllocation.java

public static SummaryStatistics replicatedOwnershipStats(TokenMetadata tokenMetadata,
        AbstractReplicationStrategy rs, InetAddress endpoint) {
    SummaryStatistics stat = new SummaryStatistics();
    StrategyAdapter strategy = getStrategy(tokenMetadata, rs, endpoint);
    for (Map.Entry<InetAddress, Double> en : evaluateReplicatedOwnership(tokenMetadata, rs).entrySet()) {
        // Filter only in the same datacentre.
        if (strategy.inAllocationRing(en.getKey()))
            stat.addValue(en.getValue() / tokenMetadata.getTokens(en.getKey()).size());
    }/*from w  w w. j  a  va 2s  . c om*/
    return stat;
}

From source file:org.apache.metron.common.dsl.functions.StellarStatistics.java

/**
 * @param windowSize The number of input data elements to maintain in memory.  If
 *                   windowSize == 0, then no data elements will be maintained in
 *                   memory./*from ww  w .  j a v  a2 s  .  c o m*/
 */
public StellarStatistics(int windowSize) {

    // only one of the underlying implementation classes will be used at a time
    if (windowSize > 0) {
        descStats = new DescriptiveStatistics(windowSize);
    } else {
        summStats = new SummaryStatistics();
    }
}

From source file:org.apache.metron.common.math.stats.OnlineStatisticsProviderTest.java

private void validateEquality(Iterable<Double> values) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    SummaryStatistics summaryStats = new SummaryStatistics();
    OnlineStatisticsProvider statsProvider = new OnlineStatisticsProvider();
    //Test that the aggregated provider gives the same results as the provider that is shown all the data.
    List<OnlineStatisticsProvider> providers = new ArrayList<>();
    for (int i = 0; i < 10; ++i) {
        providers.add(new OnlineStatisticsProvider());
    }/*from w  w w . ja v  a  2 s  . com*/
    int i = 0;
    for (double d : values) {
        i++;
        stats.addValue(d);
        summaryStats.addValue(d);
        providers.get(i % providers.size()).addValue(d);
        statsProvider.addValue(d);
    }
    StatisticsProvider aggregatedProvider = providers.get(0);
    for (int j = 1; j < providers.size(); ++j) {
        aggregatedProvider = aggregatedProvider.merge(providers.get(j));
    }
    validateStatisticsProvider(statsProvider, summaryStats, stats);
    validateStatisticsProvider(aggregatedProvider, summaryStats, stats);
}

From source file:org.apache.metron.common.stellar.StellarStatisticsFunctionsTest.java

@Before
public void setup() {
    variables = new HashMap<>();

    // test input data
    values = Arrays.asList(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0);

    // the DescriptiveStatistics is used for validation
    stats = new DescriptiveStatistics(1000);
    values.stream().forEach(val -> stats.addValue(val));

    // the StatisticalSummary is used for validation
    summaryStats = new SummaryStatistics();
    values.stream().forEach(val -> summaryStats.addValue(val));
}

From source file:org.apache.metron.statistics.StellarStatisticsFunctionsTest.java

@Test
public void testMergeProviders() throws Exception {
    List<StatisticsProvider> providers = new ArrayList<>();
    /*/*from   www.j a v  a 2 s  .  c  o m*/
    Create 10 providers, each with a sample drawn from a gaussian distribution.
    Update the reference stats from commons math to ensure we are
     */
    GaussianRandomGenerator gaussian = new GaussianRandomGenerator(new MersenneTwister(1L));
    SummaryStatistics sStatistics = new SummaryStatistics();
    DescriptiveStatistics dStatistics = new DescriptiveStatistics();
    for (int i = 0; i < 10; ++i) {
        List<Double> sample = new ArrayList<>();
        for (int j = 0; j < 100; ++j) {
            double s = gaussian.nextNormalizedDouble();
            sample.add(s);
            sStatistics.addValue(s);
            dStatistics.addValue(s);
        }
        StatisticsProvider provider = (StatisticsProvider) run(
                "STATS_ADD(STATS_INIT(), " + Joiner.on(",").join(sample) + ")", new HashMap<>());
        providers.add(provider);
    }

    /*
    Merge the providers and validate
     */
    Map<String, Object> providerVariables = new HashMap<>();
    for (int i = 0; i < providers.size(); ++i) {
        providerVariables.put("provider_" + i, providers.get(i));
    }
    StatisticsProvider mergedProvider = (StatisticsProvider) run(
            "STATS_MERGE([" + Joiner.on(",").join(providerVariables.keySet()) + "])", providerVariables);
    OnlineStatisticsProviderTest.validateStatisticsProvider(mergedProvider, sStatistics, dStatistics);

}

From source file:org.apache.solr.cloud.autoscaling.sim.TestLargeCluster.java

public void benchmarkNodeLost() throws Exception {
    List<String> results = new ArrayList<>();
    for (int wait : renard5x) {
        for (int delay : renard5x) {
            SummaryStatistics totalTime = new SummaryStatistics();
            SummaryStatistics ignoredOurEvents = new SummaryStatistics();
            SummaryStatistics ignoredOtherEvents = new SummaryStatistics();
            SummaryStatistics startedOurEvents = new SummaryStatistics();
            SummaryStatistics startedOtherEvents = new SummaryStatistics();
            for (int i = 0; i < 5; i++) {
                if (cluster != null) {
                    cluster.close();//from   w  w w  . j a  va2s.co m
                }
                setupCluster();
                setUp();
                setupTest();
                long total = doTestNodeLost(wait, delay * 1000, 0);
                totalTime.addValue(total);
                // get event counts
                Map<String, Map<String, AtomicInteger>> counts = cluster.simGetEventCounts();
                Map<String, AtomicInteger> map = counts.remove("node_lost_trigger");
                startedOurEvents.addValue(map.getOrDefault("STARTED", ZERO).get());
                ignoredOurEvents.addValue(map.getOrDefault("IGNORED", ZERO).get());
                int otherStarted = 0;
                int otherIgnored = 0;
                for (Map<String, AtomicInteger> m : counts.values()) {
                    otherStarted += m.getOrDefault("STARTED", ZERO).get();
                    otherIgnored += m.getOrDefault("IGNORED", ZERO).get();
                }
                startedOtherEvents.addValue(otherStarted);
                ignoredOtherEvents.addValue(otherIgnored);
            }
            results.add(String.format(Locale.ROOT,
                    "%d\t%d\t%4.0f\t%4.0f\t%4.0f\t%4.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f", wait, delay,
                    startedOurEvents.getMean(), ignoredOurEvents.getMean(), startedOtherEvents.getMean(),
                    ignoredOtherEvents.getMean(), totalTime.getMin(), totalTime.getMax(), totalTime.getMean(),
                    totalTime.getStandardDeviation(), totalTime.getVariance()));
        }
    }
    log.info("===== RESULTS ======");
    log.info("waitFor\tdelay\tSTRT\tIGN\toSTRT\toIGN\tmin\tmax\tmean\tstdev\tvar");
    results.forEach(s -> log.info(s));
}

From source file:org.apache.tika.eval.tokens.LuceneTokenCounter.java

void count(String field) throws IOException {
    long tokenCount = leafReader.getSumTotalTermFreq(field);
    if (tokenCount > Integer.MAX_VALUE) {
        throw new IllegalArgumentException("can't handle longs");
    }//w ww  .  ja v a 2 s  . c o  m
    int tokenCountInt = (int) tokenCount;
    int uniqueTokenCount = 0;
    SummaryStatistics summStats = new SummaryStatistics();
    double ent = 0.0d;
    double p = 0.0d;
    double base = 2.0;

    Terms terms = leafReader.terms(field);
    if (terms == null) {
        //if there were no terms
        fieldStats.put(field,
                new TokenStatistics(uniqueTokenCount, tokenCountInt, new TokenIntPair[0], ent, summStats));
        return;

    }
    TermsEnum termsEnum = terms.iterator();
    BytesRef bytesRef = termsEnum.next();
    TokenCountPriorityQueue queue = new TokenCountPriorityQueue(topN);

    while (bytesRef != null) {

        long termFreq = termsEnum.totalTermFreq();
        if (termFreq > Integer.MAX_VALUE) {
            throw new IllegalArgumentException("Sorry can't handle longs yet");
        }
        int tf = (int) termFreq;
        //TODO: figure out how to avoid Stringifying this
        //to get codepoint count
        String t = bytesRef.utf8ToString();
        int len = t.codePointCount(0, t.length());
        for (int i = 0; i < tf; i++) {
            summStats.addValue(len);
        }
        p = (double) tf / (double) tokenCount;
        ent += p * FastMath.log(base, p);

        if (queue.top() == null || queue.size() < topN || tf >= queue.top().getValue()) {
            queue.insertWithOverflow(new TokenIntPair(t, tf));
        }

        uniqueTokenCount++;
        bytesRef = termsEnum.next();
    }
    if (tokenCountInt > 0) {
        ent = (-1.0d / (double) tokenCountInt) * ent;
    }

    fieldStats.put(field,
            new TokenStatistics(uniqueTokenCount, tokenCountInt, queue.getArray(), ent, summStats));
}

From source file:org.apache.tika.eval.tokens.TokenCounter.java

private void _add(String field, Analyzer analyzer, String content) throws IOException {
    int totalTokens = 0;

    TokenStream ts = analyzer.tokenStream(field, content);
    CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
    ts.reset();// w w  w . j  a v  a2 s  .c  om
    Map<String, MutableInt> tokenMap = map.get(field);
    if (tokenMap == null) {
        tokenMap = new HashMap<>();
        map.put(field, tokenMap);
    }
    while (ts.incrementToken()) {
        String token = termAtt.toString();
        MutableInt cnt = tokenMap.get(token);
        if (cnt == null) {
            cnt = new MutableInt(1);
            tokenMap.put(token, cnt);
        } else {
            cnt.increment();
        }
        totalTokens++;
    }
    ts.close();
    ts.end();

    int totalUniqueTokens = tokenMap.size();

    double ent = 0.0d;
    double p = 0.0d;
    double base = 2.0;

    TokenCountPriorityQueue queue = new TokenCountPriorityQueue(topN);

    SummaryStatistics summaryStatistics = new SummaryStatistics();
    for (Map.Entry<String, MutableInt> e : tokenMap.entrySet()) {
        String token = e.getKey();
        int termFreq = e.getValue().intValue();

        p = (double) termFreq / (double) totalTokens;
        ent += p * FastMath.log(base, p);
        int len = token.codePointCount(0, token.length());
        for (int i = 0; i < e.getValue().intValue(); i++) {
            summaryStatistics.addValue(len);
        }
        if (queue.top() == null || queue.size() < topN || termFreq >= queue.top().getValue()) {
            queue.insertWithOverflow(new TokenIntPair(token, termFreq));
        }

    }
    if (totalTokens > 0) {
        ent = (-1.0d / (double) totalTokens) * ent;
    }

    /*            Collections.sort(allTokens);
    List<TokenIntPair> topNList = new ArrayList<>(topN);
    for (int i = 0; i < topN && i < allTokens.size(); i++) {
        topNList.add(allTokens.get(i));
    }*/

    tokenStatistics.put(field,
            new TokenStatistics(totalUniqueTokens, totalTokens, queue.getArray(), ent, summaryStatistics));

}

From source file:org.bml.util.rt.telemetry.track.AtomicIntegerTrack.java

private void initTrack(TRACK_TYPE trackType) {
    switch (trackType) {
    case DESCRIPTIVE:
        dStats = new SynchronizedDescriptiveStatistics();
        break;//from   www .j ava 2  s . c o  m
    case SUMMARY:
        sStats = new SummaryStatistics();
        break;
    }
    frequency = new Frequency();
}