Example usage for org.apache.mahout.clustering ModelDistribution sampleFromPrior

List of usage examples for org.apache.mahout.clustering ModelDistribution sampleFromPrior

Introduction

In this page you can find the example usage for org.apache.mahout.clustering ModelDistribution sampleFromPrior.

Prototype

Model<O>[] sampleFromPrior(int howMany);

Source Link

Document

Return a list of models sampled from the prior

Usage

From source file:io.github.thushear.display.DisplayDirichlet.java

License:Apache License

private static void runSequentialDirichletClassifier(ModelDistribution<VectorWritable> modelDist,
        int numClusters, int numIterations) throws IOException {
    List<Cluster> models = new ArrayList<Cluster>();
    for (Model<VectorWritable> cluster : modelDist.sampleFromPrior(numClusters)) {
        models.add((Cluster) cluster);/* w w  w .  ja  va2 s.  com*/
    }
    ClusterClassifier prior = new ClusterClassifier(models);
    Path samples = new Path("samples");
    Path output = new Path("output");
    Path priorClassifier = new Path(output, "clusters-0");
    Configuration conf = new Configuration();
    writeClassifier(prior, conf, priorClassifier);

    ClusteringPolicy policy = new DirichletClusteringPolicy(numClusters, numIterations);
    new ClusterIterator(policy).iterate(samples, priorClassifier, output, numIterations);
    for (int i = 1; i <= numIterations; i++) {
        ClusterClassifier posterior = readClassifier(conf, new Path(output, "classifier-" + i));
        List<Cluster> clusters = new ArrayList<Cluster>();
        for (Cluster cluster : posterior.getModels()) {
            if (isSignificant(cluster)) {
                clusters.add(cluster);
            }
        }
        CLUSTERS.add(clusters);
    }
}