Example usage for org.apache.mahout.math.stats OnlineSummarizer getMean

List of usage examples for org.apache.mahout.math.stats OnlineSummarizer getMean

Introduction

In this page you can find the example usage for org.apache.mahout.math.stats OnlineSummarizer getMean.

Prototype

public double getMean() 

Source Link

Usage

From source file:com.mapr.stats.bandit.BanditTrainer.java

License:Apache License

private static void printRegret(String outputFile, List<OnlineSummarizer> cumulativeRegret, List<Integer> steps)
        throws FileNotFoundException {
    try (PrintWriter out = new PrintWriter(outputFile)) {
        out.printf("n\tmean\n");
        int k = 0;
        for (OnlineSummarizer summary : cumulativeRegret) {
            out.printf("%d\t%.4f\n", steps.get(k++), summary.getMean());
            //        for (int quartile = 0; quartile <= 4; quartile++) {
            //          out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n");
            //        }
        }//from  ww  w  . j av  a  2s .co m
        out.flush();
    }
}

From source file:com.mapr.stats.bandit.EpsilonGreedy.java

License:Apache License

/**
 * Samples probability estimates from each bandit and picks the apparent best
 *
 * @return The index of the chosen bandit
 *///from w w  w  .  j a v a  2s. com
@Override
public int sample() {
    if (gen.nextDouble() < epsilon) {
        return gen.nextInt(summaries.size());
    } else {
        double max = summaries.get(0).getMean();
        int i = 0;
        int maxIndex = 0;
        for (OnlineSummarizer summary : summaries) {
            if (summary.getMean() > max) {
                max = summary.getMean();
                maxIndex = i;
            }
            i++;
        }
        return maxIndex;
    }
}

From source file:com.mapr.stats.BanditTrainer.java

License:Apache License

private static void printRegret(String outputFile, List<OnlineSummarizer> cumulativeRegret, List<Integer> steps)
        throws FileNotFoundException {
    PrintWriter out = new PrintWriter(outputFile);
    try {//from  w ww. j  av a2  s. c  o  m
        out.printf("n\tmean\n");
        int k = 0;
        for (OnlineSummarizer summary : cumulativeRegret) {
            out.printf("%d\t%.4f\n", steps.get(k++), summary.getMean());
            //        for (int quartile = 0; quartile <= 4; quartile++) {
            //          out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n");
            //        }
        }
        out.flush();
    } finally {
        out.close();
    }

}

From source file:com.mapr.synth.SchemaSamplerTest.java

License:Apache License

@Test
public void testSequence() throws IOException {
    SchemaSampler s = new SchemaSampler(
            Resources.asCharSource(Resources.getResource("schema005.json"), Charsets.UTF_8).read());
    OnlineSummarizer s0 = new OnlineSummarizer();
    OnlineSummarizer s1 = new OnlineSummarizer();
    for (int i = 0; i < 10000; i++) {
        JsonNode x = s.sample();//from ww  w .j  a  v  a2  s.  co m
        s0.add(Iterables.size(x.get("c")));
        s1.add(Iterables.size(x.get("d")));

        for (JsonNode n : x.get("d")) {
            int z = n.asInt();
            assertTrue(z >= 3 && z < 9);
        }
    }

    assertEquals(5, s0.getMean(), 1);
    assertEquals(10, s1.getMean(), 2);
}

From source file:org.conan.mymahout.clustering.streaming.tools.ClusterQualitySummarizer.java

License:Apache License

public static void printSummaries(List<OnlineSummarizer> summarizers, String type, PrintWriter fileOut) {
    double maxDistance = 0;
    for (int i = 0; i < summarizers.size(); ++i) {
        OnlineSummarizer summarizer = summarizers.get(i);
        if (summarizer.getCount() > 1) {
            maxDistance = Math.max(maxDistance, summarizer.getMax());
            System.out.printf("Average distance in cluster %d [%d]: %f\n", i, summarizer.getCount(),
                    summarizer.getMean());
            // If there is just one point in the cluster, quartiles cannot be estimated. We'll just assume all the quartiles
            // equal the only value.
            if (fileOut != null) {
                fileOut.printf("%d,%f,%f,%f,%f,%f,%f,%f,%d,%s\n", i, summarizer.getMean(), summarizer.getSD(),
                        summarizer.getQuartile(0), summarizer.getQuartile(1), summarizer.getQuartile(2),
                        summarizer.getQuartile(3), summarizer.getQuartile(4), summarizer.getCount(), type);
            }/* w w w.j a va 2s . c o  m*/
        } else {
            System.out.printf("Cluster %d is has %d data point. Need atleast 2 data points in a cluster for"
                    + " OnlineSummarizer.\n", i, summarizer.getCount());
        }
    }
    System.out.printf("Num clusters: %d; maxDistance: %f\n", summarizers.size(), maxDistance);
}