Example usage for org.apache.mahout.common.commandline DefaultOptionCreator clusteringOption

List of usage examples for org.apache.mahout.common.commandline DefaultOptionCreator clusteringOption

Introduction

In this page you can find the example usage for org.apache.mahout.common.commandline DefaultOptionCreator clusteringOption.

Prototype

public static DefaultOptionBuilder clusteringOption() 

Source Link

Document

Returns a default command line option for clustering specification.

Usage

From source file:edu.indiana.d2i.htrc.kmeans.MemCachedKMeansDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/* w w  w.  j  a  v  a  2s . c  o  m*/
    addOutputOption();
    addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.clustersInOption()
            .withDescription(
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create());
    addOption(DefaultOptionCreator.numClustersOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create());
    addOption(DefaultOptionCreator.convergenceOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.overwriteOption().create());
    addOption(DefaultOptionCreator.clusteringOption().create());
    addOption(DefaultOptionCreator.methodOption().create());

    if (parseArguments(args) == null) {
        return -1;
    }

    Path input = getInputPath();
    Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
    Path output = getOutputPath();
    String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    if (measureClass == null) {
        measureClass = SquaredEuclideanDistanceMeasure.class.getName();
    }
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), output);
    }
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);

    Configuration conf = getConf();
    // clustersIn is used as host file
    MemCachedUtil.configHelper(conf, clusters.toUri().getPath());
    int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
    MemKMeansUtil.kmeansConfigHelper(conf, k);

    // create the seeds
    log.info("Create seeds.");
    if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
        MemRandomSeedGenerator.buildRandom(getConf(), input,
                Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
    }
    boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
    boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
            .equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
    if (getConf() == null) {
        setConf(new Configuration());
    }

    // run iteration
    run(getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering,
            runSequential);
    return 0;
}