List of usage examples for org.apache.mahout.math.stats OnlineSummarizer getMean
public double getMean()
From source file:com.mapr.stats.bandit.BanditTrainer.java
License:Apache License
private static void printRegret(String outputFile, List<OnlineSummarizer> cumulativeRegret, List<Integer> steps) throws FileNotFoundException { try (PrintWriter out = new PrintWriter(outputFile)) { out.printf("n\tmean\n"); int k = 0; for (OnlineSummarizer summary : cumulativeRegret) { out.printf("%d\t%.4f\n", steps.get(k++), summary.getMean()); // for (int quartile = 0; quartile <= 4; quartile++) { // out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n"); // } }//from ww w . j av a 2s .co m out.flush(); } }
From source file:com.mapr.stats.bandit.EpsilonGreedy.java
License:Apache License
/** * Samples probability estimates from each bandit and picks the apparent best * * @return The index of the chosen bandit *///from w w w . j a v a 2s. com @Override public int sample() { if (gen.nextDouble() < epsilon) { return gen.nextInt(summaries.size()); } else { double max = summaries.get(0).getMean(); int i = 0; int maxIndex = 0; for (OnlineSummarizer summary : summaries) { if (summary.getMean() > max) { max = summary.getMean(); maxIndex = i; } i++; } return maxIndex; } }
From source file:com.mapr.stats.BanditTrainer.java
License:Apache License
private static void printRegret(String outputFile, List<OnlineSummarizer> cumulativeRegret, List<Integer> steps) throws FileNotFoundException { PrintWriter out = new PrintWriter(outputFile); try {//from w ww. j av a2 s. c o m out.printf("n\tmean\n"); int k = 0; for (OnlineSummarizer summary : cumulativeRegret) { out.printf("%d\t%.4f\n", steps.get(k++), summary.getMean()); // for (int quartile = 0; quartile <= 4; quartile++) { // out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n"); // } } out.flush(); } finally { out.close(); } }
From source file:com.mapr.synth.SchemaSamplerTest.java
License:Apache License
@Test public void testSequence() throws IOException { SchemaSampler s = new SchemaSampler( Resources.asCharSource(Resources.getResource("schema005.json"), Charsets.UTF_8).read()); OnlineSummarizer s0 = new OnlineSummarizer(); OnlineSummarizer s1 = new OnlineSummarizer(); for (int i = 0; i < 10000; i++) { JsonNode x = s.sample();//from ww w .j a v a2 s. co m s0.add(Iterables.size(x.get("c"))); s1.add(Iterables.size(x.get("d"))); for (JsonNode n : x.get("d")) { int z = n.asInt(); assertTrue(z >= 3 && z < 9); } } assertEquals(5, s0.getMean(), 1); assertEquals(10, s1.getMean(), 2); }
From source file:org.conan.mymahout.clustering.streaming.tools.ClusterQualitySummarizer.java
License:Apache License
public static void printSummaries(List<OnlineSummarizer> summarizers, String type, PrintWriter fileOut) { double maxDistance = 0; for (int i = 0; i < summarizers.size(); ++i) { OnlineSummarizer summarizer = summarizers.get(i); if (summarizer.getCount() > 1) { maxDistance = Math.max(maxDistance, summarizer.getMax()); System.out.printf("Average distance in cluster %d [%d]: %f\n", i, summarizer.getCount(), summarizer.getMean()); // If there is just one point in the cluster, quartiles cannot be estimated. We'll just assume all the quartiles // equal the only value. if (fileOut != null) { fileOut.printf("%d,%f,%f,%f,%f,%f,%f,%f,%d,%s\n", i, summarizer.getMean(), summarizer.getSD(), summarizer.getQuartile(0), summarizer.getQuartile(1), summarizer.getQuartile(2), summarizer.getQuartile(3), summarizer.getQuartile(4), summarizer.getCount(), type); }/* w w w.j a va 2s . c o m*/ } else { System.out.printf("Cluster %d is has %d data point. Need atleast 2 data points in a cluster for" + " OnlineSummarizer.\n", i, summarizer.getCount()); } } System.out.printf("Num clusters: %d; maxDistance: %f\n", summarizers.size(), maxDistance); }