List of usage examples for org.apache.mahout.math.stats OnlineSummarizer OnlineSummarizer
OnlineSummarizer
From source file:SimpleCsvExamples.java
License:Apache License
public static void main(String[] args) throws IOException { FeatureVectorEncoder[] encoder = new FeatureVectorEncoder[FIELDS]; for (int i = 0; i < FIELDS; i++) { encoder[i] = new ConstantValueEncoder("v" + 1); }//ww w . j a v a 2 s . c om OnlineSummarizer[] s = new OnlineSummarizer[FIELDS]; for (int i = 0; i < FIELDS; i++) { s[i] = new OnlineSummarizer(); } long t0 = System.currentTimeMillis(); Vector v = new DenseVector(1000); if ("--generate".equals(args[0])) { PrintWriter out = new PrintWriter( new OutputStreamWriter(new FileOutputStream(new File(args[2])), Charsets.UTF_8)); try { int n = Integer.parseInt(args[1]); for (int i = 0; i < n; i++) { Line x = Line.generate(); out.println(x); } } finally { Closeables.close(out, false); } } else if ("--parse".equals(args[0])) { BufferedReader in = Files.newReader(new File(args[1]), Charsets.UTF_8); double total = 0; try { String line = in.readLine(); while (line != null) { v.assign(0); Line x = new Line(line); for (int i = 0; i < FIELDS; i++) { double z = x.getDouble(i); total += z; //s[i].add(x.getDouble(i)); encoder[i].addToVector(x.get(i), v); } line = in.readLine(); } } finally { Closeables.close(in, true); } // String separator = ""; // for (int i = 0; i < FIELDS; i++) { // System.out.printf("%s%.3f", separator, s[i].getMean()); // separator = ","; // } System.out.println("total: " + total); } else if ("--fast".equals(args[0])) { FastLineReader in = new FastLineReader(new FileInputStream(args[1])); double total = 0; try { FastLine line = in.read(); while (line != null) { v.assign(0); for (int i = 0; i < FIELDS; i++) { double z = line.getDouble(i); total += z; //s[i].add(z); encoder[i].addToVector((byte[]) null, z, v); } line = in.read(); } } finally { Closeables.close(in, true); } // String separator = ""; // for (int i = 0; i < FIELDS; i++) { // System.out.printf("%s%.3f", separator, s[i].getMean()); // separator = ","; // } System.out.println("total: " + total); } System.out.printf("\nElapsed time = %.3f%n", (System.currentTimeMillis() - t0) / 1000.0); }
From source file:com.mapr.stats.bandit.BanditTrainer.java
License:Apache License
/** * Computes average regret relative to perfect knowledge given uniform random probabilities. The * output contains the quartiles for different numbers of trials. The quartiles are computed by * running many experiments for each specified number of trials. * <p/>/*from w w w .j a va 2 s . co m*/ * This can be plotted pretty much directly in R * <pre> * > x=read.delim(file='~/Apache/storm-aggregator/regret.tsv') * > bxp(list(com.mapr.stats=t(as.matrix(x[,2:6])), n=rep(1000,times=8),names=x$n)) * </pre> * * @param outputFile Where to put the output * @param sizes The different size experiments to use * @param replications Number of times to repeat the experiment * @param bandits How many bandits to simulate * @return Returns the average regret per trial * @throws java.io.FileNotFoundException If the output file can't be opened due to a missing directory. */ public static double averageRegret(String outputFile, int[] sizes, int replications, int bandits) throws FileNotFoundException { try (PrintWriter out = new PrintWriter(outputFile)) { double finalMedianRegret = 0; Random gen = new Random(); out.printf("n\tq0\tq1\tq2\tq3\tq4\n"); // for each horizon time span of interest for (int n : sizes) { System.out.printf("%d\n", n); OnlineSummarizer summary = new OnlineSummarizer(); // replicate the test many times for (int j = 0; j < replications; j++) { // pick probabilities at random double[] p = new double[bandits]; for (int k = 0; k < bandits; k++) { p[k] = gen.nextDouble(); } // order them to make error interpretation easier Arrays.sort(p); BetaBayesModel s = new BetaBayesModel(bandits, RandomUtils.getRandom()); int wins = 0; for (int i = 0; i < n; i++) { int k = s.sample(); final double u = gen.nextDouble(); boolean r = u <= p[k]; wins += r ? 1 : 0; s.train(k, r ? 1 : 0); } summary.add((double) wins / n - p[bandits - 1]); } out.printf("%d\t", n); for (int quartile = 0; quartile <= 4; quartile++) { out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n"); } out.flush(); finalMedianRegret = summary.getMedian(); // System.out.printf("%.3f\n", summary.getMean()); } return finalMedianRegret; } }
From source file:com.mapr.stats.bandit.BanditTrainer.java
License:Apache License
/** * Computes average regret relative to perfect knowledge given uniform random probabilities. The * output contains the quartiles for different numbers of trials. The quartiles are computed by * running many experiments for each specified number of trials. * <p/>//from w w w .j a va 2 s. c om * This can be plotted pretty much directly in R * <pre> * > x=read.delim(file='~/Apache/storm-aggregator/regret.tsv') * > bxp(list(com.mapr.stats=t(as.matrix(x[,2:6])), n=rep(1000,times=8),names=x$n)) * </pre> * * @param cumulativeOutput Where to write the cumulative regret results * @param perTurnOutput Where to write the per step regret results * @param replications How many times to replicate the experiment * @param bandits How many bandits to emulate * @param maxSteps Maximum number of trials to run per experiment * @param modelFactory How to construct the solver. * @param refSampler How to get reward distributions for bandits * @return An estimate of the average final cumulative regret * @throws java.io.FileNotFoundException If the output file can't be opened due to * a missing directory. */ public static double totalRegret(String cumulativeOutput, String perTurnOutput, int replications, int bandits, int maxSteps, BanditFactory modelFactory, DistributionGenerator refSampler) throws FileNotFoundException { List<OnlineSummarizer> cumulativeRegret = Lists.newArrayList(); List<OnlineSummarizer> localRegret = Lists.newArrayList(); List<Integer> steps = Lists.newArrayList(); List<Integer> localSteps = Lists.newArrayList(); Random gen = new Random(); // for each horizon time span of interest for (int j = 0; j < replications; j++) { BayesianBandit s = modelFactory.createBandit(bandits, gen); List<DistributionWithMean> refs = Lists.newArrayList(); for (int k = 0; k < bandits; k++) { refs.add(refSampler.nextDistribution()); } Collections.sort(refs); double wins = 0; int k = 0; int delta = 1; double totalRegret = 0; for (int i = 0; i < maxSteps; i++) { if (i > 50 * delta) { delta = bump(delta); } int choice = s.sample(); double r = refs.get(choice).nextDouble(); totalRegret += refs.get(bandits - 1).getMean() - refs.get(choice).getMean(); if ((i + 1) % delta == 0) { if (cumulativeRegret.size() <= k) { cumulativeRegret.add(new OnlineSummarizer()); steps.add(i + 1); } cumulativeRegret.get(k).add(totalRegret); k++; } if (localRegret.size() <= i / BUCKET_SIZE) { localRegret.add(new OnlineSummarizer()); localSteps.add(i); } double thisTrialRegret = refs.get(bandits - 1).getMean() - refs.get(choice).getMean(); localRegret.get(i / BUCKET_SIZE).add(thisTrialRegret); wins += r; s.train(choice, r); } } printRegret(cumulativeOutput, cumulativeRegret, steps); printRegret(perTurnOutput, localRegret, localSteps); return cumulativeRegret.get(cumulativeRegret.size() - 1).getMedian(); }
From source file:com.mapr.stats.bandit.EpsilonGreedy.java
License:Apache License
public EpsilonGreedy(int bandits, double epsilon, Random gen) { this.gen = gen; this.epsilon = epsilon; summaries = Lists.newArrayList();/*from w ww. j a va2s . co m*/ for (int i = 0; i < bandits; i++) { final OnlineSummarizer s = new OnlineSummarizer(); summaries.add(s); s.add(1); } }
From source file:com.mapr.stats.BanditTrainer.java
License:Apache License
/** * Computes average regret relative to perfect knowledge given uniform random probabilities. The * output contains the quartiles for different numbers of trials. The quartiles are computed by * running many experiments for each specified number of trials. * <p/>/*from ww w. j a va 2s . com*/ * This can be plotted pretty much directly in R * <pre> * > x=read.delim(file='~/Apache/storm-aggregator/regret.tsv') * > bxp(list(stats=t(as.matrix(x[,2:6])), n=rep(1000,times=8),names=x$n)) * </pre> * * @param outputFile Where to put the output * @param sizes The different size experiments to use * @param replications Number of times to repeat the experiment * @param bandits How many bandits to simulate * @throws FileNotFoundException If the output file can't be opened due to a missing directory. * @return Returns the average regret per trial */ public static double averageRegret(String outputFile, int[] sizes, int replications, int bandits) throws FileNotFoundException { PrintWriter out = new PrintWriter(outputFile); try { double finalMedianRegret = 0; Random gen = new Random(); out.printf("n\tq0\tq1\tq2\tq3\tq4\n"); // for each horizon time span of interest for (int n : sizes) { System.out.printf("%d\n", n); OnlineSummarizer summary = new OnlineSummarizer(); // replicate the test many times for (int j = 0; j < replications; j++) { // pick probabilities at random double[] p = new double[bandits]; for (int k = 0; k < bandits; k++) { p[k] = gen.nextDouble(); } // order them to make error interpretation easier Arrays.sort(p); BetaBayesModel s = new BetaBayesModel(bandits, new MersenneTwisterRNG()); int wins = 0; for (int i = 0; i < n; i++) { int k = s.sample(); final double u = gen.nextDouble(); boolean r = u <= p[k]; wins += r ? 1 : 0; s.train(k, r ? 1 : 0); } summary.add((double) wins / n - p[bandits - 1]); } out.printf("%d\t", n); for (int quartile = 0; quartile <= 4; quartile++) { out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n"); } out.flush(); finalMedianRegret = summary.getMedian(); // System.out.printf("%.3f\n", summary.getMean()); } return finalMedianRegret; } finally { out.close(); } }
From source file:com.mapr.synth.SchemaSamplerTest.java
License:Apache License
@Test public void testSequence() throws IOException { SchemaSampler s = new SchemaSampler( Resources.asCharSource(Resources.getResource("schema005.json"), Charsets.UTF_8).read()); OnlineSummarizer s0 = new OnlineSummarizer(); OnlineSummarizer s1 = new OnlineSummarizer(); for (int i = 0; i < 10000; i++) { JsonNode x = s.sample();/*w ww. jav a2 s.c o m*/ s0.add(Iterables.size(x.get("c"))); s1.add(Iterables.size(x.get("d"))); for (JsonNode n : x.get("d")) { int z = n.asInt(); assertTrue(z >= 3 && z < 9); } } assertEquals(5, s0.getMean(), 1); assertEquals(10, s1.getMean(), 2); }