Example usage for org.apache.mahout.clustering.classify WeightedVectorWritable toString

Introduction

In this page you can find the example usage for org.apache.mahout.clustering.classify WeightedVectorWritable toString.

Prototype

@Override
    public String toString()

Source Link

Usage

From source file:com.eniyitavsiye.mahoutx.hadoop.Job.java

License:Apache License

/**
 * Run the kmeans clustering job on an input dataset using the given the
 * number of clusters k and iteration parameters. All output data will be
 * written to the output directory, which will be initially deleted if it
 * exists. The clustered points will reside in the path
 * <output>/clustered-points. By default, the job expects a file containing
 * equal length space delimited data that resides in a directory named
 * "testdata", and writes output to a directory named "output".
 *
 * @param conf the Configuration to use/* w  w  w. java  2  s.  c o  m*/
 * @param input the String denoting the input directory path
 * @param output the String denoting the output directory path
 * @param measure the DistanceMeasure to use
 * @param k the number of clusters in Kmeans
 * @param convergenceDelta the double convergence criteria for iterations
 * @param maxIterations the int maximum number of iterations
 */
public static void run(Configuration conf, Path input, Path output, DistanceMeasure measure, int k,
        double convergenceDelta, int maxIterations) throws Exception {
    Path directoryContainingConvertedInput = new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT);
    log.info("Preparing Input");
    InputDriver.runJob(input, directoryContainingConvertedInput,
            "org.apache.mahout.math.RandomAccessSparseVector");
    log.info("Running random seed to get initial clusters");
    Path clusters = new Path(output, "random-seeds");
    clusters = RandomSeedGenerator.buildRandom(conf, directoryContainingConvertedInput, clusters, k, measure);
    System.out.println("****************************************************************************");

    log.info("Running KMeans with k = {}", k);
    KMeansDriver.run(conf, directoryContainingConvertedInput, clusters, output, measure, convergenceDelta,
            maxIterations, true, 0.0, false);
    // run ClusterDumper
    Path outGlob = new Path(output, "clusters-*-final");
    Path clusteredPoints = new Path(output, "clusteredPoints");
    log.info("Dumping out clusters from clusters: {} and clusteredPoints: {}", outGlob, clusteredPoints);
    ClusterDumper clusterDumper = new ClusterDumper(outGlob, clusteredPoints);
    clusterDumper.printClusters(null);

    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs,
            new Path("output/" + Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf);
    IntWritable key = new IntWritable();
    WeightedVectorWritable value = new WeightedVectorWritable();
    while (reader.next(key, value)) {
        System.out.println(value.toString() + " belongs to cluster " + key.toString());
    }
    reader.close();
}