Example usage for org.apache.commons.math3.stat.descriptive SummaryStatistics SummaryStatistics

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive SummaryStatistics SummaryStatistics.

Prototype

public SummaryStatistics()

Source Link

Document

Construct a SummaryStatistics instance

Usage

From source file:org.apache.cassandra.dht.tokenallocator.ReplicationAwareTokenAllocatorTest.java

private void updateSummary(ReplicationAwareTokenAllocator<Unit> t, Summary su, Summary st, boolean print) {
    int size = t.sortedTokens.size();
    double inverseAverage = 1.0 * size / t.strategy.replicas();

    Map<Unit, Double> ownership = evaluateReplicatedOwnership(t);
    SummaryStatistics unitStat = new SummaryStatistics();
    for (Map.Entry<Unit, Double> en : ownership.entrySet())
        unitStat.addValue(en.getValue() * inverseAverage / t.unitToTokens.get(en.getKey()).size());
    su.update(unitStat);//w  w  w  . ja  v a2s .  c  om

    SummaryStatistics tokenStat = new SummaryStatistics();
    for (Token tok : t.sortedTokens.keySet())
        tokenStat.addValue(replicatedTokenOwnership(tok, t.sortedTokens, t.strategy) * inverseAverage);
    st.update(tokenStat);

    if (print) {
        System.out.format("Size %d(%d)   \tunit %s  token %s   %s\n", t.unitCount(), size, mms(unitStat),
                mms(tokenStat), t.strategy);
        System.out.format("Worst intermediate unit\t%s  token %s\n", su, st);
    }
}

From source file:org.apache.cassandra.dht.tokenallocator.TokenAllocation.java

public static SummaryStatistics replicatedOwnershipStats(TokenMetadata tokenMetadata,
        AbstractReplicationStrategy rs, InetAddress endpoint) {
    SummaryStatistics stat = new SummaryStatistics();
    StrategyAdapter strategy = getStrategy(tokenMetadata, rs, endpoint);
    for (Map.Entry<InetAddress, Double> en : evaluateReplicatedOwnership(tokenMetadata, rs).entrySet()) {
        // Filter only in the same datacentre.
        if (strategy.inAllocationRing(en.getKey()))
            stat.addValue(en.getValue() / tokenMetadata.getTokens(en.getKey()).size());
    }/*from w  w w. j  a  va 2s  . c om*/
    return stat;
}

From source file:org.apache.metron.common.dsl.functions.StellarStatistics.java

/**
 * @param windowSize The number of input data elements to maintain in memory.  If
 *                   windowSize == 0, then no data elements will be maintained in
 *                   memory./*from ww  w .  j a v  a2 s  .  c o m*/
 */
public StellarStatistics(int windowSize) {

    // only one of the underlying implementation classes will be used at a time
    if (windowSize > 0) {
        descStats = new DescriptiveStatistics(windowSize);
    } else {
        summStats = new SummaryStatistics();
    }
}

From source file:org.apache.metron.common.math.stats.OnlineStatisticsProviderTest.java

private void validateEquality(Iterable<Double> values) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    SummaryStatistics summaryStats = new SummaryStatistics();
    OnlineStatisticsProvider statsProvider = new OnlineStatisticsProvider();
    //Test that the aggregated provider gives the same results as the provider that is shown all the data.
    List<OnlineStatisticsProvider> providers = new ArrayList<>();
    for (int i = 0; i < 10; ++i) {
        providers.add(new OnlineStatisticsProvider());
    }/*from w  w w . ja v  a  2 s  . com*/
    int i = 0;
    for (double d : values) {
        i++;
        stats.addValue(d);
        summaryStats.addValue(d);
        providers.get(i % providers.size()).addValue(d);
        statsProvider.addValue(d);
    }
    StatisticsProvider aggregatedProvider = providers.get(0);
    for (int j = 1; j < providers.size(); ++j) {
        aggregatedProvider = aggregatedProvider.merge(providers.get(j));
    }
    validateStatisticsProvider(statsProvider, summaryStats, stats);
    validateStatisticsProvider(aggregatedProvider, summaryStats, stats);
}

From source file:org.apache.metron.common.stellar.StellarStatisticsFunctionsTest.java

@Before
public void setup() {
    variables = new HashMap<>();

    // test input data
    values = Arrays.asList(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0);

    // the DescriptiveStatistics is used for validation
    stats = new DescriptiveStatistics(1000);
    values.stream().forEach(val -> stats.addValue(val));

    // the StatisticalSummary is used for validation
    summaryStats = new SummaryStatistics();
    values.stream().forEach(val -> summaryStats.addValue(val));
}

From source file:org.apache.metron.statistics.StellarStatisticsFunctionsTest.java

@Test
public void testMergeProviders() throws Exception {
    List<StatisticsProvider> providers = new ArrayList<>();
    /*/*from   www.j a v  a 2 s  .  c  o m*/
    Create 10 providers, each with a sample drawn from a gaussian distribution.
    Update the reference stats from commons math to ensure we are
     */
    GaussianRandomGenerator gaussian = new GaussianRandomGenerator(new MersenneTwister(1L));
    SummaryStatistics sStatistics = new SummaryStatistics();
    DescriptiveStatistics dStatistics = new DescriptiveStatistics();
    for (int i = 0; i < 10; ++i) {
        List<Double> sample = new ArrayList<>();
        for (int j = 0; j < 100; ++j) {
            double s = gaussian.nextNormalizedDouble();
            sample.add(s);
            sStatistics.addValue(s);
            dStatistics.addValue(s);
        }
        StatisticsProvider provider = (StatisticsProvider) run(
                "STATS_ADD(STATS_INIT(), " + Joiner.on(",").join(sample) + ")", new HashMap<>());
        providers.add(provider);
    }

    /*
    Merge the providers and validate
     */
    Map<String, Object> providerVariables = new HashMap<>();
    for (int i = 0; i < providers.size(); ++i) {
        providerVariables.put("provider_" + i, providers.get(i));
    }
    StatisticsProvider mergedProvider = (StatisticsProvider) run(
            "STATS_MERGE([" + Joiner.on(",").join(providerVariables.keySet()) + "])", providerVariables);
    OnlineStatisticsProviderTest.validateStatisticsProvider(mergedProvider, sStatistics, dStatistics);

}

From source file:org.apache.solr.cloud.autoscaling.sim.TestLargeCluster.java

public void benchmarkNodeLost() throws Exception {
    List<String> results = new ArrayList<>();
    for (int wait : renard5x) {
        for (int delay : renard5x) {
            SummaryStatistics totalTime = new SummaryStatistics();
            SummaryStatistics ignoredOurEvents = new SummaryStatistics();
            SummaryStatistics ignoredOtherEvents = new SummaryStatistics();
            SummaryStatistics startedOurEvents = new SummaryStatistics();
            SummaryStatistics startedOtherEvents = new SummaryStatistics();
            for (int i = 0; i < 5; i++) {
                if (cluster != null) {
                    cluster.close();//from   w  w w  . j a  va2s.co m
                }
                setupCluster();
                setUp();
                setupTest();
                long total = doTestNodeLost(wait, delay * 1000, 0);
                totalTime.addValue(total);
                // get event counts
                Map<String, Map<String, AtomicInteger>> counts = cluster.simGetEventCounts();
                Map<String, AtomicInteger> map = counts.remove("node_lost_trigger");
                startedOurEvents.addValue(map.getOrDefault("STARTED", ZERO).get());
                ignoredOurEvents.addValue(map.getOrDefault("IGNORED", ZERO).get());
                int otherStarted = 0;
                int otherIgnored = 0;
                for (Map<String, AtomicInteger> m : counts.values()) {
                    otherStarted += m.getOrDefault("STARTED", ZERO).get();
                    otherIgnored += m.getOrDefault("IGNORED", ZERO).get();
                }
                startedOtherEvents.addValue(otherStarted);
                ignoredOtherEvents.addValue(otherIgnored);
            }
            results.add(String.format(Locale.ROOT,
                    "%d\t%d\t%4.0f\t%4.0f\t%4.0f\t%4.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f", wait, delay,
                    startedOurEvents.getMean(), ignoredOurEvents.getMean(), startedOtherEvents.getMean(),
                    ignoredOtherEvents.getMean(), totalTime.getMin(), totalTime.getMax(), totalTime.getMean(),
                    totalTime.getStandardDeviation(), totalTime.getVariance()));
        }
    }
    log.info("===== RESULTS ======");
    log.info("waitFor\tdelay\tSTRT\tIGN\toSTRT\toIGN\tmin\tmax\tmean\tstdev\tvar");
    results.forEach(s -> log.info(s));
}

From source file:org.apache.tika.eval.tokens.LuceneTokenCounter.java

void count(String field) throws IOException {
    long tokenCount = leafReader.getSumTotalTermFreq(field);
    if (tokenCount > Integer.MAX_VALUE) {
        throw new IllegalArgumentException("can't handle longs");
    }//w ww  .  ja v a 2 s  . c o  m
    int tokenCountInt = (int) tokenCount;
    int uniqueTokenCount = 0;
    SummaryStatistics summStats = new SummaryStatistics();
    double ent = 0.0d;
    double p = 0.0d;
    double base = 2.0;

    Terms terms = leafReader.terms(field);
    if (terms == null) {
        //if there were no terms
        fieldStats.put(field,
                new TokenStatistics(uniqueTokenCount, tokenCountInt, new TokenIntPair[0], ent, summStats));
        return;

    }
    TermsEnum termsEnum = terms.iterator();
    BytesRef bytesRef = termsEnum.next();
    TokenCountPriorityQueue queue = new TokenCountPriorityQueue(topN);

    while (bytesRef != null) {

        long termFreq = termsEnum.totalTermFreq();
        if (termFreq > Integer.MAX_VALUE) {
            throw new IllegalArgumentException("Sorry can't handle longs yet");
        }
        int tf = (int) termFreq;
        //TODO: figure out how to avoid Stringifying this
        //to get codepoint count
        String t = bytesRef.utf8ToString();
        int len = t.codePointCount(0, t.length());
        for (int i = 0; i < tf; i++) {
            summStats.addValue(len);
        }
        p = (double) tf / (double) tokenCount;
        ent += p * FastMath.log(base, p);

        if (queue.top() == null || queue.size() < topN || tf >= queue.top().getValue()) {
            queue.insertWithOverflow(new TokenIntPair(t, tf));
        }

        uniqueTokenCount++;
        bytesRef = termsEnum.next();
    }
    if (tokenCountInt > 0) {
        ent = (-1.0d / (double) tokenCountInt) * ent;
    }

    fieldStats.put(field,
            new TokenStatistics(uniqueTokenCount, tokenCountInt, queue.getArray(), ent, summStats));
}

From source file:org.apache.tika.eval.tokens.TokenCounter.java

private void _add(String field, Analyzer analyzer, String content) throws IOException {
    int totalTokens = 0;

    TokenStream ts = analyzer.tokenStream(field, content);
    CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
    ts.reset();// w w  w . j  a v  a2 s  .c  om
    Map<String, MutableInt> tokenMap = map.get(field);
    if (tokenMap == null) {
        tokenMap = new HashMap<>();
        map.put(field, tokenMap);
    }
    while (ts.incrementToken()) {
        String token = termAtt.toString();
        MutableInt cnt = tokenMap.get(token);
        if (cnt == null) {
            cnt = new MutableInt(1);
            tokenMap.put(token, cnt);
        } else {
            cnt.increment();
        }
        totalTokens++;
    }
    ts.close();
    ts.end();

    int totalUniqueTokens = tokenMap.size();

    double ent = 0.0d;
    double p = 0.0d;
    double base = 2.0;

    TokenCountPriorityQueue queue = new TokenCountPriorityQueue(topN);

    SummaryStatistics summaryStatistics = new SummaryStatistics();
    for (Map.Entry<String, MutableInt> e : tokenMap.entrySet()) {
        String token = e.getKey();
        int termFreq = e.getValue().intValue();

        p = (double) termFreq / (double) totalTokens;
        ent += p * FastMath.log(base, p);
        int len = token.codePointCount(0, token.length());
        for (int i = 0; i < e.getValue().intValue(); i++) {
            summaryStatistics.addValue(len);
        }
        if (queue.top() == null || queue.size() < topN || termFreq >= queue.top().getValue()) {
            queue.insertWithOverflow(new TokenIntPair(token, termFreq));
        }

    }
    if (totalTokens > 0) {
        ent = (-1.0d / (double) totalTokens) * ent;
    }

    /*            Collections.sort(allTokens);
    List<TokenIntPair> topNList = new ArrayList<>(topN);
    for (int i = 0; i < topN && i < allTokens.size(); i++) {
        topNList.add(allTokens.get(i));
    }*/

    tokenStatistics.put(field,
            new TokenStatistics(totalUniqueTokens, totalTokens, queue.getArray(), ent, summaryStatistics));

}

From source file:org.bml.util.rt.telemetry.track.AtomicIntegerTrack.java

private void initTrack(TRACK_TYPE trackType) {
    switch (trackType) {
    case DESCRIPTIVE:
        dStats = new SynchronizedDescriptiveStatistics();
        break;//from   www .j ava 2  s . c o  m
    case SUMMARY:
        sStats = new SummaryStatistics();
        break;
    }
    frequency = new Frequency();
}