List of usage examples for org.apache.mahout.math.stats OnlineSummarizer add
public void add(double sample)
From source file:com.mapr.stats.bandit.BanditTrainer.java
License:Apache License
/** * Computes average regret relative to perfect knowledge given uniform random probabilities. The * output contains the quartiles for different numbers of trials. The quartiles are computed by * running many experiments for each specified number of trials. * <p/>/*from w w w . j a v a2s .c o m*/ * This can be plotted pretty much directly in R * <pre> * > x=read.delim(file='~/Apache/storm-aggregator/regret.tsv') * > bxp(list(com.mapr.stats=t(as.matrix(x[,2:6])), n=rep(1000,times=8),names=x$n)) * </pre> * * @param outputFile Where to put the output * @param sizes The different size experiments to use * @param replications Number of times to repeat the experiment * @param bandits How many bandits to simulate * @return Returns the average regret per trial * @throws java.io.FileNotFoundException If the output file can't be opened due to a missing directory. */ public static double averageRegret(String outputFile, int[] sizes, int replications, int bandits) throws FileNotFoundException { try (PrintWriter out = new PrintWriter(outputFile)) { double finalMedianRegret = 0; Random gen = new Random(); out.printf("n\tq0\tq1\tq2\tq3\tq4\n"); // for each horizon time span of interest for (int n : sizes) { System.out.printf("%d\n", n); OnlineSummarizer summary = new OnlineSummarizer(); // replicate the test many times for (int j = 0; j < replications; j++) { // pick probabilities at random double[] p = new double[bandits]; for (int k = 0; k < bandits; k++) { p[k] = gen.nextDouble(); } // order them to make error interpretation easier Arrays.sort(p); BetaBayesModel s = new BetaBayesModel(bandits, RandomUtils.getRandom()); int wins = 0; for (int i = 0; i < n; i++) { int k = s.sample(); final double u = gen.nextDouble(); boolean r = u <= p[k]; wins += r ? 1 : 0; s.train(k, r ? 1 : 0); } summary.add((double) wins / n - p[bandits - 1]); } out.printf("%d\t", n); for (int quartile = 0; quartile <= 4; quartile++) { out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n"); } out.flush(); finalMedianRegret = summary.getMedian(); // System.out.printf("%.3f\n", summary.getMean()); } return finalMedianRegret; } }
From source file:com.mapr.stats.bandit.EpsilonGreedy.java
License:Apache License
public EpsilonGreedy(int bandits, double epsilon, Random gen) { this.gen = gen; this.epsilon = epsilon; summaries = Lists.newArrayList();/* ww w .j a v a2s. c o m*/ for (int i = 0; i < bandits; i++) { final OnlineSummarizer s = new OnlineSummarizer(); summaries.add(s); s.add(1); } }
From source file:com.mapr.stats.BanditTrainer.java
License:Apache License
/** * Computes average regret relative to perfect knowledge given uniform random probabilities. The * output contains the quartiles for different numbers of trials. The quartiles are computed by * running many experiments for each specified number of trials. * <p/>/*from w ww .jav a 2 s.c om*/ * This can be plotted pretty much directly in R * <pre> * > x=read.delim(file='~/Apache/storm-aggregator/regret.tsv') * > bxp(list(stats=t(as.matrix(x[,2:6])), n=rep(1000,times=8),names=x$n)) * </pre> * * @param outputFile Where to put the output * @param sizes The different size experiments to use * @param replications Number of times to repeat the experiment * @param bandits How many bandits to simulate * @throws FileNotFoundException If the output file can't be opened due to a missing directory. * @return Returns the average regret per trial */ public static double averageRegret(String outputFile, int[] sizes, int replications, int bandits) throws FileNotFoundException { PrintWriter out = new PrintWriter(outputFile); try { double finalMedianRegret = 0; Random gen = new Random(); out.printf("n\tq0\tq1\tq2\tq3\tq4\n"); // for each horizon time span of interest for (int n : sizes) { System.out.printf("%d\n", n); OnlineSummarizer summary = new OnlineSummarizer(); // replicate the test many times for (int j = 0; j < replications; j++) { // pick probabilities at random double[] p = new double[bandits]; for (int k = 0; k < bandits; k++) { p[k] = gen.nextDouble(); } // order them to make error interpretation easier Arrays.sort(p); BetaBayesModel s = new BetaBayesModel(bandits, new MersenneTwisterRNG()); int wins = 0; for (int i = 0; i < n; i++) { int k = s.sample(); final double u = gen.nextDouble(); boolean r = u <= p[k]; wins += r ? 1 : 0; s.train(k, r ? 1 : 0); } summary.add((double) wins / n - p[bandits - 1]); } out.printf("%d\t", n); for (int quartile = 0; quartile <= 4; quartile++) { out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n"); } out.flush(); finalMedianRegret = summary.getMedian(); // System.out.printf("%.3f\n", summary.getMean()); } return finalMedianRegret; } finally { out.close(); } }
From source file:com.mapr.synth.SchemaSamplerTest.java
License:Apache License
@Test public void testSequence() throws IOException { SchemaSampler s = new SchemaSampler( Resources.asCharSource(Resources.getResource("schema005.json"), Charsets.UTF_8).read()); OnlineSummarizer s0 = new OnlineSummarizer(); OnlineSummarizer s1 = new OnlineSummarizer(); for (int i = 0; i < 10000; i++) { JsonNode x = s.sample();/*from w w w .j a va 2 s . c om*/ s0.add(Iterables.size(x.get("c"))); s1.add(Iterables.size(x.get("d"))); for (JsonNode n : x.get("d")) { int z = n.asInt(); assertTrue(z >= 3 && z < 9); } } assertEquals(5, s0.getMean(), 1); assertEquals(10, s1.getMean(), 2); }