Example usage for org.apache.mahout.math.jet.random AbstractContinousDistribution AbstractContinousDistribution

List of usage examples for org.apache.mahout.math.jet.random AbstractContinousDistribution AbstractContinousDistribution

Introduction

In this page you can find the example usage for org.apache.mahout.math.jet.random AbstractContinousDistribution AbstractContinousDistribution.

Prototype

AbstractContinousDistribution

Source Link

Usage

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testNarrowNormal() {
    // this mixture of a uniform and normal distribution has a very narrow peak which is centered
    // near the median.  Our system should be scale invariant and work well regardless.
    final Random gen = RandomUtils.getRandom();
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        AbstractContinousDistribution normal = new Normal(0, 1e-5, gen);
        AbstractContinousDistribution uniform = new Uniform(-1, 1, gen);

        @Override//from w  w  w .ja v a 2s  . c o m
        public double nextDouble() {
            double x;
            if (gen.nextDouble() < 0.5) {
                x = uniform.nextDouble();
            } else {
                x = normal.nextDouble();
            }
            return x;
        }
    };

    for (int i = 0; i < repeats(); i++) {
        runTest(mix, 100, new double[] { 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999 }, "mixture", false,
                gen);
    }
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testRepeatedValues() {
    final Random gen = RandomUtils.getRandom();

    // 5% of samples will be 0 or 1.0.  10% for each of the values 0.1 through 0.9
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        @Override/* w ww  . j  a  v a2  s .c  o m*/
        public double nextDouble() {
            return Math.rint(gen.nextDouble() * 10) / 10.0;
        }
    };

    TDigest dist = new TDigest((double) 1000, gen);
    long t0 = System.nanoTime();
    List<Double> data = Lists.newArrayList();
    for (int i1 = 0; i1 < 100000; i1++) {
        double x = mix.nextDouble();
        data.add(x);
        dist.add(x);
    }

    System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000);
    System.out.printf("# %d centroids\n", dist.centroidCount());

    // I would be happier with 5x compression, but repeated values make things kind of weird
    assertTrue("Summary is too large", dist.centroidCount() < 10 * (double) 1000);

    // all quantiles should round to nearest actual value
    for (int i = 0; i < 10; i++) {
        double z = i / 10.0;
        // we skip over troublesome points that are nearly halfway between
        for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) {
            double q = z + delta;
            double cdf = dist.cdf(q);
            // we also relax the tolerances for repeated values
            assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.005);

            double estimate = dist.quantile(q);
            assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate),
                    Math.rint(q * 10) / 10.0, estimate, 0.001);
        }
    }
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testSequentialPoints() {
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < repeats(); i++) {
        runTest(new AbstractContinousDistribution() {
            double base = 0;

            @Override//  www  . j ava 2  s  . com
            public double nextDouble() {
                base += Math.PI * 1e-5;
                return base;
            }
        }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true, gen);
    }
}

From source file:com.mapr.stats.BetaBinomialDistribution.java

License:Apache License

private AbstractContinousDistribution createBernoulliDistribution(final double p) {
    return new AbstractContinousDistribution() {
        @Override/*from w w  w  . j  av a 2s  .c  om*/
        public double nextDouble() {
            return gen.nextDouble() < p ? 1 : 0;
        }
    };
}

From source file:com.mapr.stats.BinomialDistributionSampler.java

License:Apache License

@Override
public DistributionWithMean nextDistribution() {
    final double p = bd.nextDouble();
    return new DistributionWithMean(new AbstractContinousDistribution() {
        @Override/*  ww  w .j  av  a2 s  .c o m*/
        public double nextDouble() {
            return gen.nextDouble() < p ? 1 : 0;
        }
    }, p);
}

From source file:com.tdunning.math.stats.ArrayDigestTest.java

License:Apache License

@Test
public void testNarrowNormal() {
    // this mixture of a uniform and normal distribution has a very narrow peak which is centered
    // near the median.  Our system should be scale invariant and work well regardless.
    final Random gen = RandomUtils.getRandom();
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        AbstractContinousDistribution normal = new Normal(0, 1e-5, gen);
        AbstractContinousDistribution uniform = new Uniform(-1, 1, gen);

        @Override/*from   w w  w .  j  a  va  2s.  c  o  m*/
        public double nextDouble() {
            double x;
            if (gen.nextDouble() < 0.5) {
                x = uniform.nextDouble();
            } else {
                x = normal.nextDouble();
            }
            return x;
        }
    };

    for (int i = 0; i < repeats(); i++) {
        runTest(factory, mix, 100, new double[] { 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999 },
                "mixture", false);
    }
}

From source file:com.tdunning.math.stats.ArrayDigestTest.java

License:Apache License

@Test
public void testRepeatedValues() {
    final Random gen = RandomUtils.getRandom();

    // 5% of samples will be 0 or 1.0.  10% for each of the values 0.1 through 0.9
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        @Override//from  w  w  w. j a v  a 2s  .  c om
        public double nextDouble() {
            return Math.rint(gen.nextDouble() * 10) / 10.0;
        }
    };

    for (int run = 0; run < 3 * repeats(); run++) {
        TDigest dist = new ArrayDigest(32, (double) 1000);
        List<Double> data = Lists.newArrayList();
        for (int i1 = 0; i1 < 100000; i1++) {
            data.add(mix.nextDouble());
        }

        long t0 = System.nanoTime();
        for (double x : data) {
            dist.add(x);
        }
        dist.compress();

        System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000);
        System.out.printf("# %d centroids\n", dist.centroidCount());

        // I would be happier with 5x compression, but repeated values make things kind of weird
        assertTrue(
                String.format("Summary is too large, got %d, wanted < %.1f", dist.centroidCount(), 10 * 1000.0),
                dist.centroidCount() < 10 * (double) 1000);

        // all quantiles should round to nearest actual value
        for (int i = 0; i < 10; i++) {
            double z = i / 10.0;
            // we skip over troublesome points that are nearly halfway between
            for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) {
                double q = z + delta;
                double cdf = dist.cdf(q);
                // we also relax the tolerances for repeated values
                assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.01);

                double estimate = dist.quantile(q);
                assertEquals(
                        String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate),
                        Math.rint(q * 10) / 10.0, estimate, 0.001);
            }
        }
    }
}

From source file:com.tdunning.math.stats.ArrayDigestTest.java

License:Apache License

@Test
public void testSequentialPoints() {
    for (int i = 0; i < 3 * repeats(); i++) {
        runTest(factory, new AbstractContinousDistribution() {
            double base = 0;

            @Override//from   w w w.  j av a 2s .  c o m
            public double nextDouble() {
                base += Math.PI * 1e-5;
                return base;
            }
        }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true);
    }
}

From source file:com.tdunning.math.stats.AVLTreeDigestTest.java

License:Apache License

@Test
public void testRepeatedValues() {
    final Random gen = RandomUtils.getRandom();

    // 5% of samples will be 0 or 1.0.  10% for each of the values 0.1 through 0.9
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        @Override/* ww  w.j  a v  a 2  s.c om*/
        public double nextDouble() {
            return Math.rint(gen.nextDouble() * 10) / 10.0;
        }
    };

    AVLTreeDigest dist = new AVLTreeDigest((double) 1000);
    List<Double> data = Lists.newArrayList();
    for (int i1 = 0; i1 < 100000; i1++) {
        double x = mix.nextDouble();
        data.add(x);
    }

    long t0 = System.nanoTime();
    for (double x : data) {
        dist.add(x);
    }

    System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000);
    System.out.printf("# %d centroids\n", dist.centroidCount());

    // I would be happier with 5x compression, but repeated values make things kind of weird
    assertTrue("Summary is too large: " + dist.centroidCount(), dist.centroidCount() < 10 * (double) 1000);

    // all quantiles should round to nearest actual value
    for (int i = 0; i < 10; i++) {
        double z = i / 10.0;
        // we skip over troublesome points that are nearly halfway between
        for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) {
            double q = z + delta;
            double cdf = dist.cdf(q);
            // we also relax the tolerances for repeated values
            assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.01);

            double estimate = dist.quantile(q);
            assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate),
                    Math.rint(q * 10) / 10.0, estimate, 0.001);
        }
    }
}

From source file:com.tdunning.math.stats.AVLTreeDigestTest.java

License:Apache License

@Test
public void testSequentialPoints() {
    for (int i = 0; i < repeats(); i++) {
        runTest(factory, new AbstractContinousDistribution() {
            double base = 0;

            @Override/*from   ww  w  . java  2s.  c  o m*/
            public double nextDouble() {
                base += Math.PI * 1e-5;
                return base;
            }
        }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true);
    }
}