Example usage for org.apache.mahout.math.stats OnlineSummarizer OnlineSummarizer

List of usage examples for org.apache.mahout.math.stats OnlineSummarizer OnlineSummarizer

Introduction

In this page you can find the example usage for org.apache.mahout.math.stats OnlineSummarizer OnlineSummarizer.

Prototype

OnlineSummarizer

Source Link

Usage

From source file:SimpleCsvExamples.java

License:Apache License

public static void main(String[] args) throws IOException {
    FeatureVectorEncoder[] encoder = new FeatureVectorEncoder[FIELDS];
    for (int i = 0; i < FIELDS; i++) {
        encoder[i] = new ConstantValueEncoder("v" + 1);
    }//ww w  . j  a v a 2  s  . c  om

    OnlineSummarizer[] s = new OnlineSummarizer[FIELDS];
    for (int i = 0; i < FIELDS; i++) {
        s[i] = new OnlineSummarizer();
    }
    long t0 = System.currentTimeMillis();
    Vector v = new DenseVector(1000);
    if ("--generate".equals(args[0])) {
        PrintWriter out = new PrintWriter(
                new OutputStreamWriter(new FileOutputStream(new File(args[2])), Charsets.UTF_8));
        try {
            int n = Integer.parseInt(args[1]);
            for (int i = 0; i < n; i++) {
                Line x = Line.generate();
                out.println(x);
            }
        } finally {
            Closeables.close(out, false);
        }
    } else if ("--parse".equals(args[0])) {
        BufferedReader in = Files.newReader(new File(args[1]), Charsets.UTF_8);
        double total = 0;
        try {
            String line = in.readLine();
            while (line != null) {
                v.assign(0);
                Line x = new Line(line);
                for (int i = 0; i < FIELDS; i++) {
                    double z = x.getDouble(i);
                    total += z;
                    //s[i].add(x.getDouble(i));
                    encoder[i].addToVector(x.get(i), v);
                }
                line = in.readLine();
            }
        } finally {
            Closeables.close(in, true);
        }
        //      String separator = "";
        //      for (int i = 0; i < FIELDS; i++) {
        //        System.out.printf("%s%.3f", separator, s[i].getMean());
        //        separator = ",";
        //      }
        System.out.println("total: " + total);
    } else if ("--fast".equals(args[0])) {
        FastLineReader in = new FastLineReader(new FileInputStream(args[1]));
        double total = 0;
        try {
            FastLine line = in.read();
            while (line != null) {
                v.assign(0);
                for (int i = 0; i < FIELDS; i++) {
                    double z = line.getDouble(i);
                    total += z;
                    //s[i].add(z);
                    encoder[i].addToVector((byte[]) null, z, v);
                }
                line = in.read();
            }
        } finally {
            Closeables.close(in, true);
        }
        //      String separator = "";
        //      for (int i = 0; i < FIELDS; i++) {
        //        System.out.printf("%s%.3f", separator, s[i].getMean());
        //        separator = ",";
        //      }
        System.out.println("total: " + total);
    }
    System.out.printf("\nElapsed time = %.3f%n", (System.currentTimeMillis() - t0) / 1000.0);
}

From source file:com.mapr.stats.bandit.BanditTrainer.java

License:Apache License

/**
 * Computes average regret relative to perfect knowledge given uniform random probabilities. The
 * output contains the quartiles for different numbers of trials.  The quartiles are computed by
 * running many experiments for each specified number of trials.
 * <p/>/*from  w w w  .j a va 2  s  . co  m*/
 * This can be plotted pretty much directly in R
 * <pre>
 * > x=read.delim(file='~/Apache/storm-aggregator/regret.tsv')
 * > bxp(list(com.mapr.stats=t(as.matrix(x[,2:6])), n=rep(1000,times=8),names=x$n))
 * </pre>
 *
 * @param outputFile   Where to put the output
 * @param sizes        The different size experiments to use
 * @param replications Number of times to repeat the experiment
 * @param bandits      How many bandits to simulate
 * @return Returns the average regret per trial
 * @throws java.io.FileNotFoundException If the output file can't be opened due to a missing directory.
 */
public static double averageRegret(String outputFile, int[] sizes, int replications, int bandits)
        throws FileNotFoundException {

    try (PrintWriter out = new PrintWriter(outputFile)) {
        double finalMedianRegret = 0;
        Random gen = new Random();
        out.printf("n\tq0\tq1\tq2\tq3\tq4\n");
        // for each horizon time span of interest
        for (int n : sizes) {
            System.out.printf("%d\n", n);
            OnlineSummarizer summary = new OnlineSummarizer();
            // replicate the test many times
            for (int j = 0; j < replications; j++) {
                // pick probabilities at random

                double[] p = new double[bandits];
                for (int k = 0; k < bandits; k++) {
                    p[k] = gen.nextDouble();
                }

                // order them to make error interpretation easier
                Arrays.sort(p);
                BetaBayesModel s = new BetaBayesModel(bandits, RandomUtils.getRandom());
                int wins = 0;
                for (int i = 0; i < n; i++) {
                    int k = s.sample();
                    final double u = gen.nextDouble();
                    boolean r = u <= p[k];
                    wins += r ? 1 : 0;
                    s.train(k, r ? 1 : 0);
                }
                summary.add((double) wins / n - p[bandits - 1]);
            }
            out.printf("%d\t", n);
            for (int quartile = 0; quartile <= 4; quartile++) {
                out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n");
            }
            out.flush();
            finalMedianRegret = summary.getMedian();

            //      System.out.printf("%.3f\n", summary.getMean());
        }
        return finalMedianRegret;
    }
}

From source file:com.mapr.stats.bandit.BanditTrainer.java

License:Apache License

/**
 * Computes average regret relative to perfect knowledge given uniform random probabilities. The
 * output contains the quartiles for different numbers of trials.  The quartiles are computed by
 * running many experiments for each specified number of trials.
 * <p/>//from w  w  w  .j  a va  2  s. c om
 * This can be plotted pretty much directly in R
 * <pre>
 * > x=read.delim(file='~/Apache/storm-aggregator/regret.tsv')
 * > bxp(list(com.mapr.stats=t(as.matrix(x[,2:6])), n=rep(1000,times=8),names=x$n))
 * </pre>
 *
 * @param cumulativeOutput Where to write the cumulative regret results
 * @param perTurnOutput    Where to write the per step regret results
 * @param replications     How many times to replicate the experiment
 * @param bandits          How many bandits to emulate
 * @param maxSteps         Maximum number of trials to run per experiment
 * @param modelFactory     How to construct the solver.
 * @param refSampler       How to get reward distributions for bandits
 * @return An estimate of the average final cumulative regret
 * @throws java.io.FileNotFoundException If the output file can't be opened due to
 *                                       a missing directory.
 */
public static double totalRegret(String cumulativeOutput, String perTurnOutput, int replications, int bandits,
        int maxSteps, BanditFactory modelFactory, DistributionGenerator refSampler)
        throws FileNotFoundException {
    List<OnlineSummarizer> cumulativeRegret = Lists.newArrayList();
    List<OnlineSummarizer> localRegret = Lists.newArrayList();
    List<Integer> steps = Lists.newArrayList();
    List<Integer> localSteps = Lists.newArrayList();

    Random gen = new Random();

    // for each horizon time span of interest
    for (int j = 0; j < replications; j++) {
        BayesianBandit s = modelFactory.createBandit(bandits, gen);

        List<DistributionWithMean> refs = Lists.newArrayList();
        for (int k = 0; k < bandits; k++) {
            refs.add(refSampler.nextDistribution());
        }

        Collections.sort(refs);

        double wins = 0;
        int k = 0;
        int delta = 1;
        double totalRegret = 0;
        for (int i = 0; i < maxSteps; i++) {
            if (i > 50 * delta) {
                delta = bump(delta);
            }
            int choice = s.sample();
            double r = refs.get(choice).nextDouble();

            totalRegret += refs.get(bandits - 1).getMean() - refs.get(choice).getMean();
            if ((i + 1) % delta == 0) {
                if (cumulativeRegret.size() <= k) {
                    cumulativeRegret.add(new OnlineSummarizer());
                    steps.add(i + 1);
                }
                cumulativeRegret.get(k).add(totalRegret);
                k++;
            }
            if (localRegret.size() <= i / BUCKET_SIZE) {
                localRegret.add(new OnlineSummarizer());
                localSteps.add(i);
            }
            double thisTrialRegret = refs.get(bandits - 1).getMean() - refs.get(choice).getMean();
            localRegret.get(i / BUCKET_SIZE).add(thisTrialRegret);
            wins += r;
            s.train(choice, r);
        }
    }

    printRegret(cumulativeOutput, cumulativeRegret, steps);
    printRegret(perTurnOutput, localRegret, localSteps);
    return cumulativeRegret.get(cumulativeRegret.size() - 1).getMedian();
}

From source file:com.mapr.stats.bandit.EpsilonGreedy.java

License:Apache License

public EpsilonGreedy(int bandits, double epsilon, Random gen) {
    this.gen = gen;
    this.epsilon = epsilon;
    summaries = Lists.newArrayList();/*from   w  ww. j a  va2s  .  co m*/
    for (int i = 0; i < bandits; i++) {
        final OnlineSummarizer s = new OnlineSummarizer();
        summaries.add(s);
        s.add(1);
    }
}

From source file:com.mapr.stats.BanditTrainer.java

License:Apache License

/**
 * Computes average regret relative to perfect knowledge given uniform random probabilities. The
 * output contains the quartiles for different numbers of trials.  The quartiles are computed by
 * running many experiments for each specified number of trials.
 * <p/>/*from  ww  w.  j  a va 2s . com*/
 * This can be plotted pretty much directly in R
 * <pre>
 * > x=read.delim(file='~/Apache/storm-aggregator/regret.tsv')
 * > bxp(list(stats=t(as.matrix(x[,2:6])), n=rep(1000,times=8),names=x$n))
 * </pre>
 *
 * @param outputFile   Where to put the output
 * @param sizes        The different size experiments to use
 * @param replications Number of times to repeat the experiment
 * @param bandits      How many bandits to simulate
 * @throws FileNotFoundException If the output file can't be opened due to a missing directory.
 * @return Returns the average regret per trial
 */
public static double averageRegret(String outputFile, int[] sizes, int replications, int bandits)
        throws FileNotFoundException {

    PrintWriter out = new PrintWriter(outputFile);
    try {
        double finalMedianRegret = 0;
        Random gen = new Random();
        out.printf("n\tq0\tq1\tq2\tq3\tq4\n");
        // for each horizon time span of interest
        for (int n : sizes) {
            System.out.printf("%d\n", n);
            OnlineSummarizer summary = new OnlineSummarizer();
            // replicate the test many times
            for (int j = 0; j < replications; j++) {
                // pick probabilities at random

                double[] p = new double[bandits];
                for (int k = 0; k < bandits; k++) {
                    p[k] = gen.nextDouble();
                }

                // order them to make error interpretation easier
                Arrays.sort(p);
                BetaBayesModel s = new BetaBayesModel(bandits, new MersenneTwisterRNG());
                int wins = 0;
                for (int i = 0; i < n; i++) {
                    int k = s.sample();
                    final double u = gen.nextDouble();
                    boolean r = u <= p[k];
                    wins += r ? 1 : 0;
                    s.train(k, r ? 1 : 0);
                }
                summary.add((double) wins / n - p[bandits - 1]);
            }
            out.printf("%d\t", n);
            for (int quartile = 0; quartile <= 4; quartile++) {
                out.printf("%.3f%s", summary.getQuartile(quartile), quartile < 4 ? "\t" : "\n");
            }
            out.flush();
            finalMedianRegret = summary.getMedian();

            //      System.out.printf("%.3f\n", summary.getMean());
        }
        return finalMedianRegret;
    } finally {
        out.close();
    }
}

From source file:com.mapr.synth.SchemaSamplerTest.java

License:Apache License

@Test
public void testSequence() throws IOException {
    SchemaSampler s = new SchemaSampler(
            Resources.asCharSource(Resources.getResource("schema005.json"), Charsets.UTF_8).read());
    OnlineSummarizer s0 = new OnlineSummarizer();
    OnlineSummarizer s1 = new OnlineSummarizer();
    for (int i = 0; i < 10000; i++) {
        JsonNode x = s.sample();/*w  ww.  jav  a2  s.c  o m*/
        s0.add(Iterables.size(x.get("c")));
        s1.add(Iterables.size(x.get("d")));

        for (JsonNode n : x.get("d")) {
            int z = n.asInt();
            assertTrue(z >= 3 && z < 9);
        }
    }

    assertEquals(5, s0.getMean(), 1);
    assertEquals(10, s1.getMean(), 2);
}