Example usage for org.apache.mahout.math.stats OnlineSummarizer add

Introduction

In this page you can find the example usage for org.apache.mahout.math.stats OnlineSummarizer add.

Prototype

public void add(double sample)

Source Link

Usage

From source file:com.mapr.stats.bandit.BanditTrainer.java

License:Apache License

/**
 * Computes average regret relative to perfect knowledge given uniform random probabilities. The
 * output contains the quartiles for different numbers of trials.  The quartiles are computed by
 * running many experiments for each specified number of trials.
 * <p/>/*from   w  w w  .  j a  v  a2s .c o m*/
 * This can be plotted pretty much directly in R
 * <pre>
 * > x=read.delim(file='~/Apache/storm-aggregator/regret.tsv')
 * > bxp(list(com.mapr.stats=t(as.matrix(x[,2:6])), n=rep(1000,times=8),names=x$n))
 * </pre>
 *
 * @param outputFile   Where to put the output
 * @param sizes        The different size experiments to use
 * @param replications Number of times to repeat the experiment
 * @param bandits      How many bandits to simulate
 * @return Returns the average regret per trial
 * @throws java.io.FileNotFoundException If the output file can't be opened due to a missing directory.
 */
public static double averageRegret(String outputFile, int[] sizes, int replications, int bandits)
        throws FileNotFoundException {

    try (PrintWriter out = new PrintWriter(outputFile)) {
        double finalMedianRegret = 0;
        Random gen = new Random();
        out.printf("n\tq0\tq1\tq2\tq3\tq4\n");
        // for each horizon time span of interest
        for (int n : sizes) {
            System.out.printf("%d\n", n);
            OnlineSummarizer summary = new OnlineSummarizer();
            // replicate the test many times
            for (int j = 0; j < replications; j++) {
                // pick probabilities at random

                double[] p = new double[bandits];
                for (int k = 0; k < bandits; k++) {
                    p[k] = gen.nextDouble();
                }

                // order them to make error interpretation easier
                Arrays.sort(p);
                BetaBayesModel s = new BetaBayesModel(bandits, RandomUtils.getRandom());
                int wins = 0;
                for (int i = 0; i < n; i++) {
                    int k = s.sample();
                    final double u = gen.nextDouble();
                    boolean r = u <= p[k];
                    wins += r ? 1 : 0;
                    s.train(k, r ? 1 : 0);
                }
                summary.add((double) wins / n - p[bandits - 1]);
            }
            out.printf("%d\t", n);
            for (int quartile = 0; quartile <= 4; quartile++) {
                out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n");
            }
            out.flush();
            finalMedianRegret = summary.getMedian();

            //      System.out.printf("%.3f\n", summary.getMean());
        }
        return finalMedianRegret;
    }
}

From source file:com.mapr.stats.bandit.EpsilonGreedy.java

License:Apache License

public EpsilonGreedy(int bandits, double epsilon, Random gen) {
    this.gen = gen;
    this.epsilon = epsilon;
    summaries = Lists.newArrayList();/*  ww w .j a v a2s. c o  m*/
    for (int i = 0; i < bandits; i++) {
        final OnlineSummarizer s = new OnlineSummarizer();
        summaries.add(s);
        s.add(1);
    }
}

From source file:com.mapr.stats.BanditTrainer.java

License:Apache License

/**
 * Computes average regret relative to perfect knowledge given uniform random probabilities. The
 * output contains the quartiles for different numbers of trials.  The quartiles are computed by
 * running many experiments for each specified number of trials.
 * <p/>/*from w  ww .jav a 2 s.c om*/
 * This can be plotted pretty much directly in R
 * <pre>
 * > x=read.delim(file='~/Apache/storm-aggregator/regret.tsv')
 * > bxp(list(stats=t(as.matrix(x[,2:6])), n=rep(1000,times=8),names=x$n))
 * </pre>
 *
 * @param outputFile   Where to put the output
 * @param sizes        The different size experiments to use
 * @param replications Number of times to repeat the experiment
 * @param bandits      How many bandits to simulate
 * @throws FileNotFoundException If the output file can't be opened due to a missing directory.
 * @return Returns the average regret per trial
 */
public static double averageRegret(String outputFile, int[] sizes, int replications, int bandits)
        throws FileNotFoundException {

    PrintWriter out = new PrintWriter(outputFile);
    try {
        double finalMedianRegret = 0;
        Random gen = new Random();
        out.printf("n\tq0\tq1\tq2\tq3\tq4\n");
        // for each horizon time span of interest
        for (int n : sizes) {
            System.out.printf("%d\n", n);
            OnlineSummarizer summary = new OnlineSummarizer();
            // replicate the test many times
            for (int j = 0; j < replications; j++) {
                // pick probabilities at random

                double[] p = new double[bandits];
                for (int k = 0; k < bandits; k++) {
                    p[k] = gen.nextDouble();
                }

                // order them to make error interpretation easier
                Arrays.sort(p);
                BetaBayesModel s = new BetaBayesModel(bandits, new MersenneTwisterRNG());
                int wins = 0;
                for (int i = 0; i < n; i++) {
                    int k = s.sample();
                    final double u = gen.nextDouble();
                    boolean r = u <= p[k];
                    wins += r ? 1 : 0;
                    s.train(k, r ? 1 : 0);
                }
                summary.add((double) wins / n - p[bandits - 1]);
            }
            out.printf("%d\t", n);
            for (int quartile = 0; quartile <= 4; quartile++) {
                out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n");
            }
            out.flush();
            finalMedianRegret = summary.getMedian();

            //      System.out.printf("%.3f\n", summary.getMean());
        }
        return finalMedianRegret;
    } finally {
        out.close();
    }
}

From source file:com.mapr.synth.SchemaSamplerTest.java

License:Apache License

@Test
public void testSequence() throws IOException {
    SchemaSampler s = new SchemaSampler(
            Resources.asCharSource(Resources.getResource("schema005.json"), Charsets.UTF_8).read());
    OnlineSummarizer s0 = new OnlineSummarizer();
    OnlineSummarizer s1 = new OnlineSummarizer();
    for (int i = 0; i < 10000; i++) {
        JsonNode x = s.sample();/*from   w w w .j  a va  2 s .  c om*/
        s0.add(Iterables.size(x.get("c")));
        s1.add(Iterables.size(x.get("d")));

        for (JsonNode n : x.get("d")) {
            int z = n.asInt();
            assertTrue(z >= 3 && z < 9);
        }
    }

    assertEquals(5, s0.getMean(), 1);
    assertEquals(10, s1.getMean(), 2);
}