Example usage for org.apache.mahout.common.commandline DefaultOptionCreator maxIterationsOption

List of usage examples for org.apache.mahout.common.commandline DefaultOptionCreator maxIterationsOption

Introduction

In this page you can find the example usage for org.apache.mahout.common.commandline DefaultOptionCreator maxIterationsOption.

Prototype

public static DefaultOptionBuilder maxIterationsOption() 

Source Link

Document

Returns a default command line option for specification of max number of iterations.

Usage

From source file:chapter5.KMeanSample.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*from   www  . ja v a  2s  .co  m*/
    addOutputOption();
    addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.numClustersOption().create());
    addOption(DefaultOptionCreator.t1Option().create());
    addOption(DefaultOptionCreator.t2Option().create());
    addOption(DefaultOptionCreator.convergenceOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.overwriteOption().create());

    Map<String, String> argMap = parseArguments(args);
    if (argMap == null) {
        return -1;
    }

    Path input = getInputPath();
    Path output = getOutputPath();
    String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    if (measureClass == null) {
        measureClass = SquaredEuclideanDistanceMeasure.class.getName();
    }
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), output);
    }
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
    if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
        int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
        run(getConf(), input, output, measure, k, convergenceDelta, maxIterations);
    } else {
        double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
        double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
        run(getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations);
    }
    return 0;
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*w  w  w  .j  av a2 s  . c  om*/
    addOutputOption();
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION, "cd", "The convergence delta value", "0");
    addOption(DefaultOptionCreator.overwriteOption().create());

    addOption(NUM_TOPICS, "k", "Number of topics to learn", true);
    addOption(NUM_TERMS, "nt", "Vocabulary size", false);
    addOption(DOC_TOPIC_SMOOTHING, "a", "Smoothing for document/topic distribution", "0.0001");
    addOption(TERM_TOPIC_SMOOTHING, "e", "Smoothing for topic/term distribution", "0.0001");
    addOption(DICTIONARY, "dict", "Path to term-dictionary file(s) (glob expression supported)", false);
    addOption(DOC_TOPIC_OUTPUT, "dt", "Output path for the training doc/topic distribution", false);
    addOption(MODEL_TEMP_DIR, "mt", "Path to intermediate model path (useful for restarting)", false);
    addOption(ITERATION_BLOCK_SIZE, "block", "Number of iterations per perplexity check", "10");
    addOption(RANDOM_SEED, "seed", "Random seed", false);
    addOption(TEST_SET_FRACTION, "tf", "Fraction of data to hold out for testing", "0");
    addOption(NUM_TRAIN_THREADS, "ntt", "number of threads per mapper to train with", "4");
    addOption(NUM_UPDATE_THREADS, "nut", "number of threads per mapper to update the model with", "1");
    addOption(MAX_ITERATIONS_PER_DOC, "mipd", "max number of iterations per doc for p(topic|doc) learning",
            "10");
    addOption(NUM_REDUCE_TASKS, null, "number of reducers to use during model estimation", "10");
    addOption(buildOption(BACKFILL_PERPLEXITY, null, "enable backfilling of missing perplexity values", false,
            false, null));

    if (parseArguments(args) == null) {
        return -1;
    }

    int numTopics = Integer.parseInt(getOption(NUM_TOPICS));
    Path inputPath = getInputPath();
    Path topicModelOutputPath = getOutputPath();
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    int iterationBlockSize = Integer.parseInt(getOption(ITERATION_BLOCK_SIZE));
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    double alpha = Double.parseDouble(getOption(DOC_TOPIC_SMOOTHING));
    double eta = Double.parseDouble(getOption(TERM_TOPIC_SMOOTHING));
    int numTrainThreads = Integer.parseInt(getOption(NUM_TRAIN_THREADS));
    int numUpdateThreads = Integer.parseInt(getOption(NUM_UPDATE_THREADS));
    int maxItersPerDoc = Integer.parseInt(getOption(MAX_ITERATIONS_PER_DOC));
    Path dictionaryPath = hasOption(DICTIONARY) ? new Path(getOption(DICTIONARY)) : null;
    int numTerms = hasOption(NUM_TERMS) ? Integer.parseInt(getOption(NUM_TERMS))
            : getNumTerms(getConf(), dictionaryPath);
    Path docTopicOutputPath = hasOption(DOC_TOPIC_OUTPUT) ? new Path(getOption(DOC_TOPIC_OUTPUT)) : null;
    Path modelTempPath = hasOption(MODEL_TEMP_DIR) ? new Path(getOption(MODEL_TEMP_DIR))
            : getTempPath("topicModelState");
    long seed = hasOption(RANDOM_SEED) ? Long.parseLong(getOption(RANDOM_SEED)) : System.nanoTime() % 10000;
    float testFraction = hasOption(TEST_SET_FRACTION) ? Float.parseFloat(getOption(TEST_SET_FRACTION)) : 0.0f;
    int numReduceTasks = Integer.parseInt(getOption(NUM_REDUCE_TASKS));
    boolean backfillPerplexity = hasOption(BACKFILL_PERPLEXITY);

    return run(getConf(), inputPath, topicModelOutputPath, numTopics, numTerms, alpha, eta, maxIterations,
            iterationBlockSize, convergenceDelta, dictionaryPath, docTopicOutputPath, modelTempPath, seed,
            testFraction, numTrainThreads, numUpdateThreads, maxItersPerDoc, numReduceTasks,
            backfillPerplexity);
}

From source file:com.eniyitavsiye.mahoutx.hadoop.Job.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();//  w ww  . ja v a 2 s.  c  om
    addOutputOption();
    addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.numClustersOption().create());
    addOption(DefaultOptionCreator.t1Option().create());
    addOption(DefaultOptionCreator.t2Option().create());
    addOption(DefaultOptionCreator.convergenceOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.overwriteOption().create());

    Map<String, List<String>> argMap = parseArguments(args);
    if (argMap == null) {
        return -1;
    }

    Path input = getInputPath();
    Path output = getOutputPath();
    String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    if (measureClass == null) {
        measureClass = SquaredEuclideanDistanceMeasure.class.getName();
    }
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), output);
    }
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
    if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
        int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
        run(getConf(), input, output, measure, k, convergenceDelta, maxIterations);
    } else {
        double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
        double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
        run(getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations);
    }
    return 0;
}

From source file:edu.indiana.d2i.htrc.kmeans.MemCachedKMeansDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*from   ww w. ja  v  a  2s. c  o  m*/
    addOutputOption();
    addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.clustersInOption()
            .withDescription(
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create());
    addOption(DefaultOptionCreator.numClustersOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create());
    addOption(DefaultOptionCreator.convergenceOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.overwriteOption().create());
    addOption(DefaultOptionCreator.clusteringOption().create());
    addOption(DefaultOptionCreator.methodOption().create());

    if (parseArguments(args) == null) {
        return -1;
    }

    Path input = getInputPath();
    Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
    Path output = getOutputPath();
    String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    if (measureClass == null) {
        measureClass = SquaredEuclideanDistanceMeasure.class.getName();
    }
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), output);
    }
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);

    Configuration conf = getConf();
    // clustersIn is used as host file
    MemCachedUtil.configHelper(conf, clusters.toUri().getPath());
    int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
    MemKMeansUtil.kmeansConfigHelper(conf, k);

    // create the seeds
    log.info("Create seeds.");
    if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
        MemRandomSeedGenerator.buildRandom(getConf(), input,
                Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
    }
    boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
    boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
            .equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
    if (getConf() == null) {
        setConf(new Configuration());
    }

    // run iteration
    run(getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering,
            runSequential);
    return 0;
}

From source file:org.conan.mymahout.clustering.syntheticcontrol.fuzzykmeans.Job.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*  w w w .ja  v  a  2 s.c om*/
    addOutputOption();
    addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.convergenceOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.overwriteOption().create());
    addOption(DefaultOptionCreator.t1Option().create());
    addOption(DefaultOptionCreator.t2Option().create());
    addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true);

    Map<String, List<String>> argMap = parseArguments(args);
    if (argMap == null) {
        return -1;
    }

    Path input = getInputPath();
    Path output = getOutputPath();
    String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    if (measureClass == null) {
        measureClass = SquaredEuclideanDistanceMeasure.class.getName();
    }
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    float fuzziness = Float.parseFloat(getOption(M_OPTION));

    addOption(new DefaultOptionBuilder().withLongName(M_OPTION).withRequired(true)
            .withArgument(new ArgumentBuilder().withName(M_OPTION).withMinimum(1).withMaximum(1).create())
            .withDescription("coefficient normalization factor, must be greater than 1").withShortName(M_OPTION)
            .create());
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), output);
    }
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
    double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
    double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
    run(getConf(), input, output, measure, t1, t2, maxIterations, fuzziness, convergenceDelta);
    return 0;
}

From source file:org.swjtu.helloworldcn.APCDriver.java

License:Apache License

public int run(String[] arg0) throws Exception {
    // TODO Auto-generated method stub
    Configuration conf = getConf();
    addInputOption();//from  w  w  w  . ja  v a2s.  c o  m
    addOutputOption();
    addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
    //addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption("inputdatatype", "tp", "Input data type,0 indicate original data; 1 indicate similary data", "0");

    addOption("dampfact", "lam",
            "update equation damping level in [0.5, 1).  Higher  values correspond to heavy damping, which may be needed if oscillations occur. (default: 0.9)",
            "0.9");
    addOption("convits", "cv",
            "if the estimated exemplars stay fixed for convits iterations, APCLUSTER terminates early (default: 100)",
            "100");
    // preference
    addOption("preference", "pf", "indicates the preference that data point i be chosen as an exemplar", "100");
    addOption("nonoise", "nn",
            "APCLUSTER adds a small amount of noise to similary data to prevent degenerate cases:(default: 0 enabled;1 disabled)",
            "0");
    Map<String, String> parsedArgs = parseArguments(arg0);
    if (parsedArgs == null) {
        return 0;
    }

    Path input = getInputPath();
    Path output = getOutputPath();
    // if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
    HadoopUtil.delete(conf, output);
    // }
    numDims = Integer.parseInt(parsedArgs.get("--dimensions"));
    /*String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass,DistanceMeasure.class);*/

    maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    System.out.println("convits" + parsedArgs.get("--convits"));
    convits = Integer.parseInt(parsedArgs.get("--convits"));
    lamda = Double.parseDouble(parsedArgs.get("--dampfact"));
    nonoise = Integer.parseInt(parsedArgs.get("--nonoise"));
    double preference = Double.parseDouble(parsedArgs.get("--preference"));
    int inputDataType = Integer.parseInt(parsedArgs.get("--inputdatatype"));
    e = new ArrayList<RandomAccessSparseVector>(convits);
    run(conf, input, output, numDims, preference, inputDataType, lamda, maxIterations);

    return 0;
}