Example usage for org.apache.mahout.math.jet.random AbstractContinousDistribution AbstractContinousDistribution

Introduction

In this page you can find the example usage for org.apache.mahout.math.jet.random AbstractContinousDistribution AbstractContinousDistribution.

Prototype

AbstractContinousDistribution

Source Link

Usage

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testNarrowNormal() {
    // this mixture of a uniform and normal distribution has a very narrow peak which is centered
    // near the median.  Our system should be scale invariant and work well regardless.
    final Random gen = RandomUtils.getRandom();
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        AbstractContinousDistribution normal = new Normal(0, 1e-5, gen);
        AbstractContinousDistribution uniform = new Uniform(-1, 1, gen);

        @Override//from w  w  w .ja v a 2s  . c o m
        public double nextDouble() {
            double x;
            if (gen.nextDouble() < 0.5) {
                x = uniform.nextDouble();
            } else {
                x = normal.nextDouble();
            }
            return x;
        }
    };

    for (int i = 0; i < repeats(); i++) {
        runTest(mix, 100, new double[] { 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999 }, "mixture", false,
                gen);
    }
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testRepeatedValues() {
    final Random gen = RandomUtils.getRandom();

    // 5% of samples will be 0 or 1.0.  10% for each of the values 0.1 through 0.9
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        @Override/* w ww  . j  a  v a2  s .c  o m*/
        public double nextDouble() {
            return Math.rint(gen.nextDouble() * 10) / 10.0;
        }
    };

    TDigest dist = new TDigest((double) 1000, gen);
    long t0 = System.nanoTime();
    List<Double> data = Lists.newArrayList();
    for (int i1 = 0; i1 < 100000; i1++) {
        double x = mix.nextDouble();
        data.add(x);
        dist.add(x);
    }

    System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000);
    System.out.printf("# %d centroids\n", dist.centroidCount());

    // I would be happier with 5x compression, but repeated values make things kind of weird
    assertTrue("Summary is too large", dist.centroidCount() < 10 * (double) 1000);

    // all quantiles should round to nearest actual value
    for (int i = 0; i < 10; i++) {
        double z = i / 10.0;
        // we skip over troublesome points that are nearly halfway between
        for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) {
            double q = z + delta;
            double cdf = dist.cdf(q);
            // we also relax the tolerances for repeated values
            assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.005);

            double estimate = dist.quantile(q);
            assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate),
                    Math.rint(q * 10) / 10.0, estimate, 0.001);
        }
    }
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testSequentialPoints() {
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < repeats(); i++) {
        runTest(new AbstractContinousDistribution() {
            double base = 0;

            @Override//  www  . j ava 2  s  . com
            public double nextDouble() {
                base += Math.PI * 1e-5;
                return base;
            }
        }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true, gen);
    }
}

From source file:com.mapr.stats.BetaBinomialDistribution.java

License:Apache License

private AbstractContinousDistribution createBernoulliDistribution(final double p) {
    return new AbstractContinousDistribution() {
        @Override/*from w w  w  . j  av a 2s  .c  om*/
        public double nextDouble() {
            return gen.nextDouble() < p ? 1 : 0;
        }
    };
}

From source file:com.mapr.stats.BinomialDistributionSampler.java

License:Apache License

@Override
public DistributionWithMean nextDistribution() {
    final double p = bd.nextDouble();
    return new DistributionWithMean(new AbstractContinousDistribution() {
        @Override/*  ww  w .j  av  a2 s  .c o m*/
        public double nextDouble() {
            return gen.nextDouble() < p ? 1 : 0;
        }
    }, p);
}

From source file:com.tdunning.math.stats.ArrayDigestTest.java

License:Apache License

@Test
public void testNarrowNormal() {
    // this mixture of a uniform and normal distribution has a very narrow peak which is centered
    // near the median.  Our system should be scale invariant and work well regardless.
    final Random gen = RandomUtils.getRandom();
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        AbstractContinousDistribution normal = new Normal(0, 1e-5, gen);
        AbstractContinousDistribution uniform = new Uniform(-1, 1, gen);

        @Override/*from   w w  w .  j  a  va  2s.  c  o  m*/
        public double nextDouble() {
            double x;
            if (gen.nextDouble() < 0.5) {
                x = uniform.nextDouble();
            } else {
                x = normal.nextDouble();
            }
            return x;
        }
    };

    for (int i = 0; i < repeats(); i++) {
        runTest(factory, mix, 100, new double[] { 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999 },
                "mixture", false);
    }
}

From source file:com.tdunning.math.stats.ArrayDigestTest.java

License:Apache License

@Test
public void testRepeatedValues() {
    final Random gen = RandomUtils.getRandom();

    // 5% of samples will be 0 or 1.0.  10% for each of the values 0.1 through 0.9
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        @Override//from  w  w  w. j a v  a 2s  .  c om
        public double nextDouble() {
            return Math.rint(gen.nextDouble() * 10) / 10.0;
        }
    };

    for (int run = 0; run < 3 * repeats(); run++) {
        TDigest dist = new ArrayDigest(32, (double) 1000);
        List<Double> data = Lists.newArrayList();
        for (int i1 = 0; i1 < 100000; i1++) {
            data.add(mix.nextDouble());
        }

        long t0 = System.nanoTime();
        for (double x : data) {
            dist.add(x);
        }
        dist.compress();

        System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000);
        System.out.printf("# %d centroids\n", dist.centroidCount());

        // I would be happier with 5x compression, but repeated values make things kind of weird
        assertTrue(
                String.format("Summary is too large, got %d, wanted < %.1f", dist.centroidCount(), 10 * 1000.0),
                dist.centroidCount() < 10 * (double) 1000);

        // all quantiles should round to nearest actual value
        for (int i = 0; i < 10; i++) {
            double z = i / 10.0;
            // we skip over troublesome points that are nearly halfway between
            for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) {
                double q = z + delta;
                double cdf = dist.cdf(q);
                // we also relax the tolerances for repeated values
                assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.01);

                double estimate = dist.quantile(q);
                assertEquals(
                        String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate),
                        Math.rint(q * 10) / 10.0, estimate, 0.001);
            }
        }
    }
}

From source file:com.tdunning.math.stats.ArrayDigestTest.java

License:Apache License

@Test
public void testSequentialPoints() {
    for (int i = 0; i < 3 * repeats(); i++) {
        runTest(factory, new AbstractContinousDistribution() {
            double base = 0;

            @Override//from   w w w.  j av a 2s .  c o m
            public double nextDouble() {
                base += Math.PI * 1e-5;
                return base;
            }
        }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true);
    }
}

From source file:com.tdunning.math.stats.AVLTreeDigestTest.java

License:Apache License

@Test
public void testRepeatedValues() {
    final Random gen = RandomUtils.getRandom();

    // 5% of samples will be 0 or 1.0.  10% for each of the values 0.1 through 0.9
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        @Override/* ww  w.j  a v  a 2  s.c om*/
        public double nextDouble() {
            return Math.rint(gen.nextDouble() * 10) / 10.0;
        }
    };

    AVLTreeDigest dist = new AVLTreeDigest((double) 1000);
    List<Double> data = Lists.newArrayList();
    for (int i1 = 0; i1 < 100000; i1++) {
        double x = mix.nextDouble();
        data.add(x);
    }

    long t0 = System.nanoTime();
    for (double x : data) {
        dist.add(x);
    }

    System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000);
    System.out.printf("# %d centroids\n", dist.centroidCount());

    // I would be happier with 5x compression, but repeated values make things kind of weird
    assertTrue("Summary is too large: " + dist.centroidCount(), dist.centroidCount() < 10 * (double) 1000);

    // all quantiles should round to nearest actual value
    for (int i = 0; i < 10; i++) {
        double z = i / 10.0;
        // we skip over troublesome points that are nearly halfway between
        for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) {
            double q = z + delta;
            double cdf = dist.cdf(q);
            // we also relax the tolerances for repeated values
            assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.01);

            double estimate = dist.quantile(q);
            assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate),
                    Math.rint(q * 10) / 10.0, estimate, 0.001);
        }
    }
}

From source file:com.tdunning.math.stats.AVLTreeDigestTest.java

License:Apache License

@Test
public void testSequentialPoints() {
    for (int i = 0; i < repeats(); i++) {
        runTest(factory, new AbstractContinousDistribution() {
            double base = 0;

            @Override/*from   ww  w  . java  2s.  c  o m*/
            public double nextDouble() {
                base += Math.PI * 1e-5;
                return base;
            }
        }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true);
    }
}