List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue
public void addValue(double v)
From source file:org.apache.solr.client.solrj.io.eval.ExponentialMovingAverageEvaluator.java
@Override public Object doWork(Object... values) throws IOException { if (!(2 == values.length || values.length == 3)) { throw new IOException( String.format(Locale.ROOT, "%s(...) only works with 2 or 3 values but %d were provided", constructingFactory.getFunctionName(getClass()), values.length)); }/*from ww w. ja v a 2 s. co m*/ List<?> observations = (List<?>) values[0]; Number window = (Number) values[1]; Number alpha; if (2 == values.length) { if (!(observations instanceof List<?>)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the first value, expecting a List", toExpression(constructingFactory), values[0].getClass().getSimpleName())); } if (!(observations.size() > 1)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found list size of %s for the first value, expecting a List of size > 0.", toExpression(constructingFactory), observations.size())); } if (!(window instanceof Number)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a Number", toExpression(constructingFactory), values[1].getClass().getSimpleName())); } if (window.doubleValue() > observations.size()) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found a window size of %s for the second value, the first value has a List size of %s, expecting a window value smaller or equal to the List size", toExpression(constructingFactory), window.intValue(), observations.size())); } } if (3 == values.length) { alpha = (Number) values[2]; if (!(alpha instanceof Number)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the third value, expecting a Number", toExpression(constructingFactory), values[2].getClass().getSimpleName())); } if (!(alpha.doubleValue() >= 0 && alpha.doubleValue() <= 1.0)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - out of range, found %s for the third value, expecting a range between 0 and 1.0", toExpression(constructingFactory), alpha.doubleValue())); } } else { alpha = 2.0 / (window.doubleValue() + 1.0); } List<Number> sequence = new ArrayList<>(); DescriptiveStatistics slider = new DescriptiveStatistics(window.intValue()); Number lastValue = 0; for (Object value : observations) { slider.addValue(((Number) value).doubleValue()); if (slider.getN() == window.intValue()) { lastValue = slider.getMean(); break; } } sequence.add(lastValue); int i = 0; for (Object value : observations) { if (i >= window.intValue()) { Number val = (alpha.doubleValue() * (((Number) value).doubleValue() - lastValue.doubleValue()) + lastValue.doubleValue()); sequence.add(val); lastValue = val; } ++i; } return sequence; }
From source file:org.apache.solr.client.solrj.io.eval.MovingAverageEvaluator.java
@Override public Object doWork(Object first, Object second) throws IOException { if (null == first) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the first value", toExpression(constructingFactory))); }/* ww w . j a va 2s. c om*/ if (null == second) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the second value", toExpression(constructingFactory))); } if (!(first instanceof List<?>)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the first value, expecting a List", toExpression(constructingFactory), first.getClass().getSimpleName())); } if (!(second instanceof Number)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a Number", toExpression(constructingFactory), first.getClass().getSimpleName())); } List<?> values = (List<?>) first; int window = ((Number) second).intValue(); List<Number> moving = new ArrayList<>(); DescriptiveStatistics slider = new DescriptiveStatistics(window); for (Object value : values) { slider.addValue(((Number) value).doubleValue()); if (slider.getN() >= window) { moving.add(slider.getMean()); } } return moving; }
From source file:org.apache.solr.client.solrj.io.eval.MovingMedianEvaluator.java
@Override public Object doWork(Object first, Object second) throws IOException { if (null == first) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the first value", toExpression(constructingFactory))); }/*from w w w .ja va 2 s . c o m*/ if (null == second) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the second value", toExpression(constructingFactory))); } if (!(first instanceof List<?>)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the first value, expecting a List", toExpression(constructingFactory), first.getClass().getSimpleName())); } if (!(second instanceof Number)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a Number", toExpression(constructingFactory), first.getClass().getSimpleName())); } List<?> values = (List<?>) first; int window = ((Number) second).intValue(); List<Number> moving = new ArrayList<>(); DescriptiveStatistics slider = new DescriptiveStatistics(window); Percentile percentile = new Percentile(); for (Object value : values) { slider.addValue(((Number) value).doubleValue()); if (slider.getN() >= window) { double median = percentile.evaluate(slider.getValues(), 50); moving.add(median); } } return moving; }
From source file:org.asoem.greyfish.impl.environment.AgentObjectPoolAT.java
@Test public void testSignificantPerformanceBenefit() throws Exception { // given//www .j a v a2 s . com final int runs = 1000; final DescriptiveStatistics statisticsWithoutObjectPool = new DescriptiveStatistics(); final DescriptiveStatistics statisticsWithObjectPool = new DescriptiveStatistics(); final Supplier<BasicAgent> agentFactory = new Supplier<BasicAgent>() { @Override public BasicAgent get() { return DefaultBasicAgent.builder() .addAllActions( GenericAction.<BasicAgent>builder().name("reproduce") .executedIf(AlwaysTrueCondition.<BasicAgent>builder().build()) .executes(Callbacks.emptyCallback()).build(), GenericAction.<BasicAgent>builder().name("die") .executedIf(AlwaysTrueCondition.<BasicAgent>builder().build()) .executes(Callbacks.emptyCallback()).build()) .build(); } }; // when final int objects = 1000; for (int i = 0; i < runs; i++) { // randomize execution order if (RandomGenerators.rng().nextBoolean()) { statisticsWithoutObjectPool.addValue(measureAgentCreation(objects, agentFactory)); statisticsWithObjectPool.addValue(measureAgentRecycling(objects, agentFactory)); } else { statisticsWithObjectPool.addValue(measureAgentRecycling(objects, agentFactory)); statisticsWithoutObjectPool.addValue(measureAgentCreation(objects, agentFactory)); } } // then logger.info("Simulation with object pool vs. without object pool: {}, {}", statisticsWithObjectPool, statisticsWithoutObjectPool); // Is it faster? assertThat( "The mean elapsed time of the version with an object pool " + "is not less than the mean elapsed time of the version with an object pool", statisticsWithObjectPool.getMean(), is(lessThan(statisticsWithoutObjectPool.getMean()))); // Is it also significantly faster? Make a t-test. // Test assumptions for t-test: normality assertThat("Is not normal distributed", StatisticalTests.shapiroWilk(statisticsWithObjectPool.getValues()).p(), is(lessThan(SIGNIFICANT.getAlpha()))); assertThat("Is not normal distributed", StatisticalTests.shapiroWilk(statisticsWithoutObjectPool.getValues()).p(), is(lessThan(SIGNIFICANT.getAlpha()))); // Perform the t-test final double t = new TTest().t(statisticsWithObjectPool, statisticsWithoutObjectPool); final double p = new TTest().tTest(statisticsWithObjectPool, statisticsWithoutObjectPool); logger.info("t-test: t={}, p={}", t, p); double qt = new TDistribution(statisticsWithObjectPool.getN() - 1 + statisticsWithoutObjectPool.getN() - 1) .inverseCumulativeProbability(1 - SIGNIFICANT.getAlpha() / 2); assertThat("The means are not significantly different", Math.abs(t), is(greaterThan(qt))); }
From source file:org.asoem.greyfish.impl.environment.AgentObjectPoolAT.java
@Test public void testSignificantPerformanceBenefitInSimulationContext() throws Exception { // given/*from w ww . ja v a 2 s. c o m*/ final int populationSize = 400; final int steps = 30000; final int runs = 20; final DescriptiveStatistics statisticsWithoutObjectPool = new DescriptiveStatistics(); final DescriptiveStatistics statisticsWithObjectPool = new DescriptiveStatistics(); final ExecutorService executorService = MoreExecutors.sameThreadExecutor(); // when for (int i = 0; i < runs; i++) { // randomize execution order if (RandomGenerators.rng().nextBoolean()) { statisticsWithoutObjectPool.addValue(measureExecutionTime( new SimulationWithoutObjectPoolFactory(populationSize, executorService).newSimulation(), steps)); statisticsWithObjectPool.addValue(measureExecutionTime( new SimulationWithObjectPoolFactory(populationSize, executorService).newSimulation(), steps)); } else { statisticsWithObjectPool.addValue(measureExecutionTime( new SimulationWithObjectPoolFactory(populationSize, executorService).newSimulation(), steps)); statisticsWithoutObjectPool.addValue(measureExecutionTime( new SimulationWithoutObjectPoolFactory(populationSize, executorService).newSimulation(), steps)); } } // then logger.info("Simulation with object pool vs. without object pool: {}, {}", statisticsWithObjectPool, statisticsWithoutObjectPool); assertThat( "The mean elapsed time of the version with an object pool " + "is not less than the mean elapsed time of the version with an object pool", statisticsWithObjectPool.getMean(), is(lessThan(statisticsWithoutObjectPool.getMean()))); // Is it also significantly faster? Make a t-test. // Test assumptions for t-test: normality assertThat("Is not normal distributed", StatisticalTests.shapiroWilk(statisticsWithObjectPool.getValues()).p(), is(lessThan(0.05))); assertThat("Is not normal distributed", StatisticalTests.shapiroWilk(statisticsWithoutObjectPool.getValues()).p(), is(lessThan(0.05))); final double t = TestUtils.t(statisticsWithObjectPool, statisticsWithoutObjectPool); final double p = TestUtils.tTest(statisticsWithObjectPool, statisticsWithoutObjectPool); logger.info("t-test: t={}, p={}", t, p); double qt = new TDistribution(statisticsWithObjectPool.getN() - 1 + statisticsWithoutObjectPool.getN() - 1) .inverseCumulativeProbability(0.975); assertThat("The means are not significantly different", Math.abs(t), is(greaterThan(qt))); }
From source file:org.asoem.greyfish.utils.collect.UnrolledListAT.java
protected static void testFindFirst(final int runs, final List<String> controlList, final FunctionalList<String> functionalList, final Iterable<Predicate<String>> predicates) { // given/* w w w.j av a 2 s . c om*/ final DescriptiveStatistics statisticsFunctional = new DescriptiveStatistics(); final DescriptiveStatistics statisticsControl = new DescriptiveStatistics(); final Runnable runnable1 = new Runnable() { @Override public void run() { statisticsControl.addValue(measureFindFirstIterative(controlList, predicates)); } }; final Runnable runnable2 = new Runnable() { @Override public void run() { statisticsFunctional.addValue(measureFindFirstUnrolled(functionalList, predicates)); } }; // burn in phase executeRandomized(1000, RandomGenerators.rng(), new Runnable() { @Override public void run() { measureFindFirstIterative(controlList, predicates); } }, new Runnable() { @Override public void run() { measureFindFirstUnrolled(functionalList, predicates); } }); // when executeRandomized(runs, RandomGenerators.rng(), runnable1, runnable2); // then Statistics.assertSignificantDecrease(statisticsControl, statisticsFunctional, SIGNIFICANT.getAlpha()); }
From source file:org.asoem.greyfish.utils.math.ThreadLocalRandomGeneratorAT.java
private static Tuple2<DescriptiveStatistics, DescriptiveStatistics> measure(final int nThreads, final Supplier<RandomGenerator> generatorSupplier, final int nMeasurements, final TaskFactory taskFactory) throws InterruptedException, ExecutionException { // given/*from ww w . j a va2 s . c om*/ final RandomGenerator synchronizedRandomGenerator = RandomGenerators .synchronizedGenerator(generatorSupplier.get()); final RandomGenerator threadLocalRandomGenerator = RandomGenerators.threadLocalGenerator(generatorSupplier); // when final ExecutorService executorService = Executors.newFixedThreadPool(nThreads); final List<Future<Long>> futuresSynchronized = Lists.newArrayList(); for (int i = 0; i < nMeasurements; i++) { final Callable<Long> task = taskFactory.createTask(synchronizedRandomGenerator); final Future<Long> future = executorService.submit(task); futuresSynchronized.add(future); } final DescriptiveStatistics statisticsSynchronized = new DescriptiveStatistics(); for (Future<Long> longFuture : futuresSynchronized) { statisticsSynchronized.addValue(longFuture.get()); } final List<Future<Long>> futuresThreadLocal = Lists.newArrayList(); for (int i = 0; i < nMeasurements; i++) { final Callable<Long> task = taskFactory.createTask(threadLocalRandomGenerator); final Future<Long> future = executorService.submit(task); futuresThreadLocal.add(future); } final DescriptiveStatistics statisticsThreadLocal = new DescriptiveStatistics(); for (Future<Long> longFuture : futuresThreadLocal) { statisticsThreadLocal.addValue(longFuture.get()); } System.out.println("Synchronized stats in us: " + statisticsSynchronized); System.out.println("ThreadLocal stats in us: " + statisticsThreadLocal); return Tuple2.of(statisticsSynchronized, statisticsThreadLocal); }
From source file:org.bml.util.rt.telemetry.SecondBasedCounter.java
/** * There may be a way to do this faster however I chose to use a bound * DescriptiveStatistics object to allow the caller to encapsulate more * operations than just a simple sum./*from w ww .j a v a 2 s . com*/ * * @return DescriptiveStatistics containing the last 60 seconds of telemetry * data. because of the use of atomics this should be as up to date as * possible without getting into manual locking which this method and all * methods in this class are contracted to avoid at all costs. */ public DescriptiveStatistics getLastMinutesTelemetry() { DescriptiveStatistics stats = new DescriptiveStatistics(60); for (int c = 0; c < 60; c++) { stats.addValue(counterArray[c].doubleValue()); } return stats; }
From source file:org.cirdles.calamari.algorithms.TukeyBiweight.java
/** * Calculates arithmetic median of array of doubles. * * @pre values has one element/*w ww. j av a2 s .com*/ * @param values * @return */ public static double calculateMedian(double[] values) { double median; // enforce precondition if (values.length == 0) { median = 0.0; } else { DescriptiveStatistics stats = new DescriptiveStatistics(); // Add the data from the array for (int i = 0; i < values.length; i++) { stats.addValue(values[i]); } median = stats.getPercentile(50); } return median; }
From source file:org.commoncrawl.mapred.ec2.postprocess.deduper.DeduperUtils.java
/** * /*from ww w .j a va 2 s. c o m*/ * @param args */ public static void main(String[] args) throws IOException { URLFPBloomFilter filter = new URLFPBloomFilter(JSONSetBuilder.NUM_ELEMENTS, JSONSetBuilder.NUM_HASH_FUNCTIONS, JSONSetBuilder.NUM_BITS); DescriptiveStatistics filterClearStats = new DescriptiveStatistics(); for (int i = 0; i < 1000; ++i) { long timeStart = System.nanoTime(); filter.clear(); long timeEnd = System.nanoTime(); filterClearStats.addValue(timeEnd - timeStart); } System.out.println("Mean Clear Time:" + filterClearStats.getMean()); System.out.println("size:" + BINOMIAL_COFF); for (int j = 0; j < BINOMIAL_COFF; ++j) { int value = patternArray[j]; System.out.print("value:" + value + " "); for (int i = 5; i >= 0; --i) { System.out.print(((value & (1 << i)) != 0) ? '1' : '0'); } System.out.print(" Key MSBLen:" + Integer.toString(patternKeyMSBits[j]) + "\n"); } validateGenerator(); long key1 = new BitBuilder().on(10).off(1).on(53).bits(); long key2 = new BitBuilder().on(10).off(4).on(50).bits(); long key3 = new BitBuilder().on(10).off(4).on(47).off(3).bits(); long key4 = new BitBuilder().off(10).on(4).off(47).on(3).bits(); long key5 = new BitBuilder().off(10).on(4).off(47).on(1).off(2).bits(); Assert.assertTrue(SimHash.hammingDistance(key1, key2) == 3); Assert.assertTrue(SimHash.hammingDistance(key1, key3) != 3); Assert.assertTrue(SimHash.hammingDistance(key2, key3) == 3); Assert.assertTrue(SimHash.hammingDistance(key1, key4) > 3); Assert.assertTrue(SimHash.hammingDistance(key2, key4) > 3); Assert.assertTrue(SimHash.hammingDistance(key3, key4) > 3); Assert.assertTrue(SimHash.hammingDistance(key4, key5) <= 3); ImmutableList<DeduperValue> values = new ImmutableList.Builder<DeduperValue>() .add(new DeduperValue(key1, 1000, 2000, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.1"), 1000, new TextBytes("http://adomain.com/"))) .add(new DeduperValue(key2, 1001, 2001, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000, new TextBytes("http://bdomain.com/"))) .add(new DeduperValue(key3, 1002, 2002, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.3"), 1000, new TextBytes("http://cdomain.com/"))) .add(new DeduperValue(key4, 1003, 2003, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.4"), 1000, new TextBytes("http://ddomain.com/"))) .add(new DeduperValue(key5, 1004, 2004, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.5"), 1000, new TextBytes("http://edomain.com/"))) .build(); SimhashMatcher unionFinder = new SimhashMatcher(); final Multimap<String, Long> rootDomainToDupes = TreeMultimap.create(); // collect all json set representations ... final ArrayList<TextBytes> jsonSets = new ArrayList<TextBytes>(); unionFinder.emitMatches(3, values.iterator(), new OutputCollector<TextBytes, TextBytes>() { @Override public void collect(TextBytes key, TextBytes value) throws IOException { System.out.println("Root:" + key + " JSON: " + value.toString()); populateTestJSONSetData(rootDomainToDupes, key, value); // collect all json sets for later disjoint-set join jsonSets.add(value); } }, null); ImmutableList<Long> hashSuperSet1 = ImmutableList.of(2000L, 2001L, 2002L); ImmutableList<Long> hashSuperSet2 = ImmutableList.of(2003L, 2004L); Assert.assertTrue(rootDomainToDupes.get("adomain.com").containsAll(hashSuperSet1)); Assert.assertTrue(rootDomainToDupes.get("bdomain.com").containsAll(hashSuperSet1)); Assert.assertTrue(rootDomainToDupes.get("cdomain.com").containsAll(hashSuperSet1)); Assert.assertTrue(rootDomainToDupes.get("ddomain.com").containsAll(hashSuperSet2)); Assert.assertTrue(rootDomainToDupes.get("edomain.com").containsAll(hashSuperSet2)); ImmutableList<DeduperValue> secondSetValues = new ImmutableList.Builder<DeduperValue>() .add(new DeduperValue(key1, 1000, 2000, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000, new TextBytes("http://adomain.com/"))) .add(new DeduperValue(key1, 1007, 2007, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000, new TextBytes("http://z1domain.com/"))) .add(new DeduperValue(key2, 1008, 2008, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000, new TextBytes("http://z2domain.com/"))) .add(new DeduperValue(key3, 1009, 2009, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000, new TextBytes("http://z3domain.com/"))) .build(); unionFinder.emitMatches(3, secondSetValues.iterator(), new OutputCollector<TextBytes, TextBytes>() { @Override public void collect(TextBytes key, TextBytes value) throws IOException { System.out.println("Root:" + key + " JSON: " + value.toString()); // collect all json sets for later disjoint-set join jsonSets.add(value); } }, null); SetUnionFinder unionFinder2 = new SetUnionFinder(); // union all json sets ... unionFinder2.union(jsonSets.iterator()); // ok emit union of sets ... unionFinder2.emit(new TextBytes("test"), new OutputCollector<TextBytes, TextBytes>() { @Override public void collect(TextBytes key, TextBytes value) throws IOException { System.out.println("Root:" + key + " JSON: " + value.toString()); } }, null); }