List of usage examples for org.apache.mahout.math.jet.random AbstractContinousDistribution AbstractContinousDistribution
AbstractContinousDistribution
From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java
License:Apache License
@Test public void testNarrowNormal() { // this mixture of a uniform and normal distribution has a very narrow peak which is centered // near the median. Our system should be scale invariant and work well regardless. final Random gen = RandomUtils.getRandom(); AbstractContinousDistribution mix = new AbstractContinousDistribution() { AbstractContinousDistribution normal = new Normal(0, 1e-5, gen); AbstractContinousDistribution uniform = new Uniform(-1, 1, gen); @Override//from w w w .ja v a 2s . c o m public double nextDouble() { double x; if (gen.nextDouble() < 0.5) { x = uniform.nextDouble(); } else { x = normal.nextDouble(); } return x; } }; for (int i = 0; i < repeats(); i++) { runTest(mix, 100, new double[] { 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999 }, "mixture", false, gen); } }
From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java
License:Apache License
@Test public void testRepeatedValues() { final Random gen = RandomUtils.getRandom(); // 5% of samples will be 0 or 1.0. 10% for each of the values 0.1 through 0.9 AbstractContinousDistribution mix = new AbstractContinousDistribution() { @Override/* w ww . j a v a2 s .c o m*/ public double nextDouble() { return Math.rint(gen.nextDouble() * 10) / 10.0; } }; TDigest dist = new TDigest((double) 1000, gen); long t0 = System.nanoTime(); List<Double> data = Lists.newArrayList(); for (int i1 = 0; i1 < 100000; i1++) { double x = mix.nextDouble(); data.add(x); dist.add(x); } System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000); System.out.printf("# %d centroids\n", dist.centroidCount()); // I would be happier with 5x compression, but repeated values make things kind of weird assertTrue("Summary is too large", dist.centroidCount() < 10 * (double) 1000); // all quantiles should round to nearest actual value for (int i = 0; i < 10; i++) { double z = i / 10.0; // we skip over troublesome points that are nearly halfway between for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) { double q = z + delta; double cdf = dist.cdf(q); // we also relax the tolerances for repeated values assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.005); double estimate = dist.quantile(q); assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate), Math.rint(q * 10) / 10.0, estimate, 0.001); } } }
From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java
License:Apache License
@Test public void testSequentialPoints() { Random gen = RandomUtils.getRandom(); for (int i = 0; i < repeats(); i++) { runTest(new AbstractContinousDistribution() { double base = 0; @Override// www . j ava 2 s . com public double nextDouble() { base += Math.PI * 1e-5; return base; } }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true, gen); } }
From source file:com.mapr.stats.BetaBinomialDistribution.java
License:Apache License
private AbstractContinousDistribution createBernoulliDistribution(final double p) { return new AbstractContinousDistribution() { @Override/*from w w w . j av a 2s .c om*/ public double nextDouble() { return gen.nextDouble() < p ? 1 : 0; } }; }
From source file:com.mapr.stats.BinomialDistributionSampler.java
License:Apache License
@Override public DistributionWithMean nextDistribution() { final double p = bd.nextDouble(); return new DistributionWithMean(new AbstractContinousDistribution() { @Override/* ww w .j av a2 s .c o m*/ public double nextDouble() { return gen.nextDouble() < p ? 1 : 0; } }, p); }
From source file:com.tdunning.math.stats.ArrayDigestTest.java
License:Apache License
@Test public void testNarrowNormal() { // this mixture of a uniform and normal distribution has a very narrow peak which is centered // near the median. Our system should be scale invariant and work well regardless. final Random gen = RandomUtils.getRandom(); AbstractContinousDistribution mix = new AbstractContinousDistribution() { AbstractContinousDistribution normal = new Normal(0, 1e-5, gen); AbstractContinousDistribution uniform = new Uniform(-1, 1, gen); @Override/*from w w w . j a va 2s. c o m*/ public double nextDouble() { double x; if (gen.nextDouble() < 0.5) { x = uniform.nextDouble(); } else { x = normal.nextDouble(); } return x; } }; for (int i = 0; i < repeats(); i++) { runTest(factory, mix, 100, new double[] { 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999 }, "mixture", false); } }
From source file:com.tdunning.math.stats.ArrayDigestTest.java
License:Apache License
@Test public void testRepeatedValues() { final Random gen = RandomUtils.getRandom(); // 5% of samples will be 0 or 1.0. 10% for each of the values 0.1 through 0.9 AbstractContinousDistribution mix = new AbstractContinousDistribution() { @Override//from w w w. j a v a 2s . c om public double nextDouble() { return Math.rint(gen.nextDouble() * 10) / 10.0; } }; for (int run = 0; run < 3 * repeats(); run++) { TDigest dist = new ArrayDigest(32, (double) 1000); List<Double> data = Lists.newArrayList(); for (int i1 = 0; i1 < 100000; i1++) { data.add(mix.nextDouble()); } long t0 = System.nanoTime(); for (double x : data) { dist.add(x); } dist.compress(); System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000); System.out.printf("# %d centroids\n", dist.centroidCount()); // I would be happier with 5x compression, but repeated values make things kind of weird assertTrue( String.format("Summary is too large, got %d, wanted < %.1f", dist.centroidCount(), 10 * 1000.0), dist.centroidCount() < 10 * (double) 1000); // all quantiles should round to nearest actual value for (int i = 0; i < 10; i++) { double z = i / 10.0; // we skip over troublesome points that are nearly halfway between for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) { double q = z + delta; double cdf = dist.cdf(q); // we also relax the tolerances for repeated values assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.01); double estimate = dist.quantile(q); assertEquals( String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate), Math.rint(q * 10) / 10.0, estimate, 0.001); } } } }
From source file:com.tdunning.math.stats.ArrayDigestTest.java
License:Apache License
@Test public void testSequentialPoints() { for (int i = 0; i < 3 * repeats(); i++) { runTest(factory, new AbstractContinousDistribution() { double base = 0; @Override//from w w w. j av a 2s . c o m public double nextDouble() { base += Math.PI * 1e-5; return base; } }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true); } }
From source file:com.tdunning.math.stats.AVLTreeDigestTest.java
License:Apache License
@Test public void testRepeatedValues() { final Random gen = RandomUtils.getRandom(); // 5% of samples will be 0 or 1.0. 10% for each of the values 0.1 through 0.9 AbstractContinousDistribution mix = new AbstractContinousDistribution() { @Override/* ww w.j a v a 2 s.c om*/ public double nextDouble() { return Math.rint(gen.nextDouble() * 10) / 10.0; } }; AVLTreeDigest dist = new AVLTreeDigest((double) 1000); List<Double> data = Lists.newArrayList(); for (int i1 = 0; i1 < 100000; i1++) { double x = mix.nextDouble(); data.add(x); } long t0 = System.nanoTime(); for (double x : data) { dist.add(x); } System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000); System.out.printf("# %d centroids\n", dist.centroidCount()); // I would be happier with 5x compression, but repeated values make things kind of weird assertTrue("Summary is too large: " + dist.centroidCount(), dist.centroidCount() < 10 * (double) 1000); // all quantiles should round to nearest actual value for (int i = 0; i < 10; i++) { double z = i / 10.0; // we skip over troublesome points that are nearly halfway between for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) { double q = z + delta; double cdf = dist.cdf(q); // we also relax the tolerances for repeated values assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.01); double estimate = dist.quantile(q); assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate), Math.rint(q * 10) / 10.0, estimate, 0.001); } } }
From source file:com.tdunning.math.stats.AVLTreeDigestTest.java
License:Apache License
@Test public void testSequentialPoints() { for (int i = 0; i < repeats(); i++) { runTest(factory, new AbstractContinousDistribution() { double base = 0; @Override/*from ww w . java 2s. c o m*/ public double nextDouble() { base += Math.PI * 1e-5; return base; } }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true); } }