List of usage examples for org.apache.mahout.common.commandline DefaultOptionCreator numClustersOption
public static DefaultOptionBuilder numClustersOption()
From source file:chapter5.KMeanSample.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from w w w .java 2 s . c om*/ addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.numClustersOption().create()); addOption(DefaultOptionCreator.t1Option().create()); addOption(DefaultOptionCreator.t2Option().create()); addOption(DefaultOptionCreator.convergenceOption().create()); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.overwriteOption().create()); Map<String, String> argMap = parseArguments(args); if (argMap == null) { return -1; } Path input = getInputPath(); Path output = getOutputPath(); String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); if (measureClass == null) { measureClass = SquaredEuclideanDistanceMeasure.class.getName(); } double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION)); int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)); if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), output); } DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class); if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) { int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)); run(getConf(), input, output, measure, k, convergenceDelta, maxIterations); } else { double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION)); double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION)); run(getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations); } return 0; }
From source file:com.eniyitavsiye.mahoutx.hadoop.Job.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from w w w.j a v a 2 s .co m*/ addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.numClustersOption().create()); addOption(DefaultOptionCreator.t1Option().create()); addOption(DefaultOptionCreator.t2Option().create()); addOption(DefaultOptionCreator.convergenceOption().create()); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.overwriteOption().create()); Map<String, List<String>> argMap = parseArguments(args); if (argMap == null) { return -1; } Path input = getInputPath(); Path output = getOutputPath(); String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); if (measureClass == null) { measureClass = SquaredEuclideanDistanceMeasure.class.getName(); } double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION)); int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)); if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), output); } DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class); if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) { int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)); run(getConf(), input, output, measure, k, convergenceDelta, maxIterations); } else { double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION)); double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION)); run(getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations); } return 0; }
From source file:edu.indiana.d2i.htrc.kmeans.MemCachedKMeansDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from www . jav a 2s.c o m*/ addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.clustersInOption() .withDescription( "The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. " + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first") .create()); addOption(DefaultOptionCreator.numClustersOption() .withDescription( "The k in k-Means. If specified, then a random selection of k Vectors will be chosen" + " as the Centroid and written to the clusters input path.") .create()); addOption(DefaultOptionCreator.convergenceOption().create()); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.overwriteOption().create()); addOption(DefaultOptionCreator.clusteringOption().create()); addOption(DefaultOptionCreator.methodOption().create()); if (parseArguments(args) == null) { return -1; } Path input = getInputPath(); Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION)); Path output = getOutputPath(); String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); if (measureClass == null) { measureClass = SquaredEuclideanDistanceMeasure.class.getName(); } double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION)); int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)); if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), output); } DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class); Configuration conf = getConf(); // clustersIn is used as host file MemCachedUtil.configHelper(conf, clusters.toUri().getPath()); int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)); MemKMeansUtil.kmeansConfigHelper(conf, k); // create the seeds log.info("Create seeds."); if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) { MemRandomSeedGenerator.buildRandom(getConf(), input, Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure); } boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION); boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION) .equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD); if (getConf() == null) { setConf(new Configuration()); } // run iteration run(getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering, runSequential); return 0; }