List of usage examples for org.apache.mahout.common.commandline DefaultOptionCreator CONVERGENCE_DELTA_OPTION
String CONVERGENCE_DELTA_OPTION
To view the source code for org.apache.mahout.common.commandline DefaultOptionCreator CONVERGENCE_DELTA_OPTION.
Click Source Link
From source file:chapter5.KMeanSample.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();// w w w .j a v a2 s.c o m addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.numClustersOption().create()); addOption(DefaultOptionCreator.t1Option().create()); addOption(DefaultOptionCreator.t2Option().create()); addOption(DefaultOptionCreator.convergenceOption().create()); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.overwriteOption().create()); Map<String, String> argMap = parseArguments(args); if (argMap == null) { return -1; } Path input = getInputPath(); Path output = getOutputPath(); String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); if (measureClass == null) { measureClass = SquaredEuclideanDistanceMeasure.class.getName(); } double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION)); int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)); if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), output); } DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class); if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) { int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)); run(getConf(), input, output, measure, k, convergenceDelta, maxIterations); } else { double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION)); double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION)); run(getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations); } return 0; }
From source file:com.elex.dmp.lda.CVB0Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();//from www. jav a 2 s. c om addOutputOption(); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION, "cd", "The convergence delta value", "0"); addOption(DefaultOptionCreator.overwriteOption().create()); addOption(NUM_TOPICS, "k", "Number of topics to learn", true); addOption(NUM_TERMS, "nt", "Vocabulary size", false); addOption(DOC_TOPIC_SMOOTHING, "a", "Smoothing for document/topic distribution", "0.0001"); addOption(TERM_TOPIC_SMOOTHING, "e", "Smoothing for topic/term distribution", "0.0001"); addOption(DICTIONARY, "dict", "Path to term-dictionary file(s) (glob expression supported)", false); addOption(DOC_TOPIC_OUTPUT, "dt", "Output path for the training doc/topic distribution", false); addOption(MODEL_TEMP_DIR, "mt", "Path to intermediate model path (useful for restarting)", false); addOption(ITERATION_BLOCK_SIZE, "block", "Number of iterations per perplexity check", "10"); addOption(RANDOM_SEED, "seed", "Random seed", false); addOption(TEST_SET_FRACTION, "tf", "Fraction of data to hold out for testing", "0"); addOption(NUM_TRAIN_THREADS, "ntt", "number of threads per mapper to train with", "4"); addOption(NUM_UPDATE_THREADS, "nut", "number of threads per mapper to update the model with", "1"); addOption(MAX_ITERATIONS_PER_DOC, "mipd", "max number of iterations per doc for p(topic|doc) learning", "10"); addOption(NUM_REDUCE_TASKS, null, "number of reducers to use during model estimation", "10"); addOption(buildOption(BACKFILL_PERPLEXITY, null, "enable backfilling of missing perplexity values", false, false, null)); if (parseArguments(args) == null) { return -1; } int numTopics = Integer.parseInt(getOption(NUM_TOPICS)); Path inputPath = getInputPath(); Path topicModelOutputPath = getOutputPath(); int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)); int iterationBlockSize = Integer.parseInt(getOption(ITERATION_BLOCK_SIZE)); double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION)); double alpha = Double.parseDouble(getOption(DOC_TOPIC_SMOOTHING)); double eta = Double.parseDouble(getOption(TERM_TOPIC_SMOOTHING)); int numTrainThreads = Integer.parseInt(getOption(NUM_TRAIN_THREADS)); int numUpdateThreads = Integer.parseInt(getOption(NUM_UPDATE_THREADS)); int maxItersPerDoc = Integer.parseInt(getOption(MAX_ITERATIONS_PER_DOC)); Path dictionaryPath = hasOption(DICTIONARY) ? new Path(getOption(DICTIONARY)) : null; int numTerms = hasOption(NUM_TERMS) ? Integer.parseInt(getOption(NUM_TERMS)) : getNumTerms(getConf(), dictionaryPath); Path docTopicOutputPath = hasOption(DOC_TOPIC_OUTPUT) ? new Path(getOption(DOC_TOPIC_OUTPUT)) : null; Path modelTempPath = hasOption(MODEL_TEMP_DIR) ? new Path(getOption(MODEL_TEMP_DIR)) : getTempPath("topicModelState"); long seed = hasOption(RANDOM_SEED) ? Long.parseLong(getOption(RANDOM_SEED)) : System.nanoTime() % 10000; float testFraction = hasOption(TEST_SET_FRACTION) ? Float.parseFloat(getOption(TEST_SET_FRACTION)) : 0.0f; int numReduceTasks = Integer.parseInt(getOption(NUM_REDUCE_TASKS)); boolean backfillPerplexity = hasOption(BACKFILL_PERPLEXITY); return run(getConf(), inputPath, topicModelOutputPath, numTopics, numTerms, alpha, eta, maxIterations, iterationBlockSize, convergenceDelta, dictionaryPath, docTopicOutputPath, modelTempPath, seed, testFraction, numTrainThreads, numUpdateThreads, maxItersPerDoc, numReduceTasks, backfillPerplexity); }
From source file:com.eniyitavsiye.mahoutx.hadoop.Job.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from w w w . j a v a2 s .co m*/ addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.numClustersOption().create()); addOption(DefaultOptionCreator.t1Option().create()); addOption(DefaultOptionCreator.t2Option().create()); addOption(DefaultOptionCreator.convergenceOption().create()); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.overwriteOption().create()); Map<String, List<String>> argMap = parseArguments(args); if (argMap == null) { return -1; } Path input = getInputPath(); Path output = getOutputPath(); String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); if (measureClass == null) { measureClass = SquaredEuclideanDistanceMeasure.class.getName(); } double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION)); int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)); if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), output); } DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class); if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) { int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)); run(getConf(), input, output, measure, k, convergenceDelta, maxIterations); } else { double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION)); double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION)); run(getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations); } return 0; }
From source file:edu.indiana.d2i.htrc.kmeans.MemCachedKMeansDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/* w w w .j a v a 2 s . co m*/ addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.clustersInOption() .withDescription( "The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. " + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first") .create()); addOption(DefaultOptionCreator.numClustersOption() .withDescription( "The k in k-Means. If specified, then a random selection of k Vectors will be chosen" + " as the Centroid and written to the clusters input path.") .create()); addOption(DefaultOptionCreator.convergenceOption().create()); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.overwriteOption().create()); addOption(DefaultOptionCreator.clusteringOption().create()); addOption(DefaultOptionCreator.methodOption().create()); if (parseArguments(args) == null) { return -1; } Path input = getInputPath(); Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION)); Path output = getOutputPath(); String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); if (measureClass == null) { measureClass = SquaredEuclideanDistanceMeasure.class.getName(); } double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION)); int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)); if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), output); } DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class); Configuration conf = getConf(); // clustersIn is used as host file MemCachedUtil.configHelper(conf, clusters.toUri().getPath()); int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)); MemKMeansUtil.kmeansConfigHelper(conf, k); // create the seeds log.info("Create seeds."); if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) { MemRandomSeedGenerator.buildRandom(getConf(), input, Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure); } boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION); boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION) .equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD); if (getConf() == null) { setConf(new Configuration()); } // run iteration run(getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering, runSequential); return 0; }
From source file:org.conan.mymahout.clustering.syntheticcontrol.fuzzykmeans.Job.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/* w ww .j a v a 2 s .co m*/ addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.convergenceOption().create()); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.overwriteOption().create()); addOption(DefaultOptionCreator.t1Option().create()); addOption(DefaultOptionCreator.t2Option().create()); addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true); Map<String, List<String>> argMap = parseArguments(args); if (argMap == null) { return -1; } Path input = getInputPath(); Path output = getOutputPath(); String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); if (measureClass == null) { measureClass = SquaredEuclideanDistanceMeasure.class.getName(); } double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION)); int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)); float fuzziness = Float.parseFloat(getOption(M_OPTION)); addOption(new DefaultOptionBuilder().withLongName(M_OPTION).withRequired(true) .withArgument(new ArgumentBuilder().withName(M_OPTION).withMinimum(1).withMaximum(1).create()) .withDescription("coefficient normalization factor, must be greater than 1").withShortName(M_OPTION) .create()); if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), output); } DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class); double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION)); double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION)); run(getConf(), input, output, measure, t1, t2, maxIterations, fuzziness, convergenceDelta); return 0; }