Example usage for org.apache.mahout.common.commandline DefaultOptionCreator MAX_ITERATIONS_OPTION

List of usage examples for org.apache.mahout.common.commandline DefaultOptionCreator MAX_ITERATIONS_OPTION

Introduction

In this page you can find the example usage for org.apache.mahout.common.commandline DefaultOptionCreator MAX_ITERATIONS_OPTION.

Prototype

String MAX_ITERATIONS_OPTION

To view the source code for org.apache.mahout.common.commandline DefaultOptionCreator MAX_ITERATIONS_OPTION.

Click Source Link

Usage

From source file:chapter5.KMeanSample.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();// w w w . ja v a  2  s .  com
    addOutputOption();
    addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.numClustersOption().create());
    addOption(DefaultOptionCreator.t1Option().create());
    addOption(DefaultOptionCreator.t2Option().create());
    addOption(DefaultOptionCreator.convergenceOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.overwriteOption().create());

    Map<String, String> argMap = parseArguments(args);
    if (argMap == null) {
        return -1;
    }

    Path input = getInputPath();
    Path output = getOutputPath();
    String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    if (measureClass == null) {
        measureClass = SquaredEuclideanDistanceMeasure.class.getName();
    }
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), output);
    }
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
    if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
        int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
        run(getConf(), input, output, measure, k, convergenceDelta, maxIterations);
    } else {
        double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
        double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
        run(getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations);
    }
    return 0;
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*from w  w  w  . j a  va 2 s . com*/
    addOutputOption();
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION, "cd", "The convergence delta value", "0");
    addOption(DefaultOptionCreator.overwriteOption().create());

    addOption(NUM_TOPICS, "k", "Number of topics to learn", true);
    addOption(NUM_TERMS, "nt", "Vocabulary size", false);
    addOption(DOC_TOPIC_SMOOTHING, "a", "Smoothing for document/topic distribution", "0.0001");
    addOption(TERM_TOPIC_SMOOTHING, "e", "Smoothing for topic/term distribution", "0.0001");
    addOption(DICTIONARY, "dict", "Path to term-dictionary file(s) (glob expression supported)", false);
    addOption(DOC_TOPIC_OUTPUT, "dt", "Output path for the training doc/topic distribution", false);
    addOption(MODEL_TEMP_DIR, "mt", "Path to intermediate model path (useful for restarting)", false);
    addOption(ITERATION_BLOCK_SIZE, "block", "Number of iterations per perplexity check", "10");
    addOption(RANDOM_SEED, "seed", "Random seed", false);
    addOption(TEST_SET_FRACTION, "tf", "Fraction of data to hold out for testing", "0");
    addOption(NUM_TRAIN_THREADS, "ntt", "number of threads per mapper to train with", "4");
    addOption(NUM_UPDATE_THREADS, "nut", "number of threads per mapper to update the model with", "1");
    addOption(MAX_ITERATIONS_PER_DOC, "mipd", "max number of iterations per doc for p(topic|doc) learning",
            "10");
    addOption(NUM_REDUCE_TASKS, null, "number of reducers to use during model estimation", "10");
    addOption(buildOption(BACKFILL_PERPLEXITY, null, "enable backfilling of missing perplexity values", false,
            false, null));

    if (parseArguments(args) == null) {
        return -1;
    }

    int numTopics = Integer.parseInt(getOption(NUM_TOPICS));
    Path inputPath = getInputPath();
    Path topicModelOutputPath = getOutputPath();
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    int iterationBlockSize = Integer.parseInt(getOption(ITERATION_BLOCK_SIZE));
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    double alpha = Double.parseDouble(getOption(DOC_TOPIC_SMOOTHING));
    double eta = Double.parseDouble(getOption(TERM_TOPIC_SMOOTHING));
    int numTrainThreads = Integer.parseInt(getOption(NUM_TRAIN_THREADS));
    int numUpdateThreads = Integer.parseInt(getOption(NUM_UPDATE_THREADS));
    int maxItersPerDoc = Integer.parseInt(getOption(MAX_ITERATIONS_PER_DOC));
    Path dictionaryPath = hasOption(DICTIONARY) ? new Path(getOption(DICTIONARY)) : null;
    int numTerms = hasOption(NUM_TERMS) ? Integer.parseInt(getOption(NUM_TERMS))
            : getNumTerms(getConf(), dictionaryPath);
    Path docTopicOutputPath = hasOption(DOC_TOPIC_OUTPUT) ? new Path(getOption(DOC_TOPIC_OUTPUT)) : null;
    Path modelTempPath = hasOption(MODEL_TEMP_DIR) ? new Path(getOption(MODEL_TEMP_DIR))
            : getTempPath("topicModelState");
    long seed = hasOption(RANDOM_SEED) ? Long.parseLong(getOption(RANDOM_SEED)) : System.nanoTime() % 10000;
    float testFraction = hasOption(TEST_SET_FRACTION) ? Float.parseFloat(getOption(TEST_SET_FRACTION)) : 0.0f;
    int numReduceTasks = Integer.parseInt(getOption(NUM_REDUCE_TASKS));
    boolean backfillPerplexity = hasOption(BACKFILL_PERPLEXITY);

    return run(getConf(), inputPath, topicModelOutputPath, numTopics, numTerms, alpha, eta, maxIterations,
            iterationBlockSize, convergenceDelta, dictionaryPath, docTopicOutputPath, modelTempPath, seed,
            testFraction, numTrainThreads, numUpdateThreads, maxItersPerDoc, numReduceTasks,
            backfillPerplexity);
}

From source file:com.eniyitavsiye.mahoutx.hadoop.Job.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();//from  ww  w.ja  va2  s. c o m
    addOutputOption();
    addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.numClustersOption().create());
    addOption(DefaultOptionCreator.t1Option().create());
    addOption(DefaultOptionCreator.t2Option().create());
    addOption(DefaultOptionCreator.convergenceOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.overwriteOption().create());

    Map<String, List<String>> argMap = parseArguments(args);
    if (argMap == null) {
        return -1;
    }

    Path input = getInputPath();
    Path output = getOutputPath();
    String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    if (measureClass == null) {
        measureClass = SquaredEuclideanDistanceMeasure.class.getName();
    }
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), output);
    }
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
    if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
        int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
        run(getConf(), input, output, measure, k, convergenceDelta, maxIterations);
    } else {
        double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
        double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
        run(getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations);
    }
    return 0;
}

From source file:edu.indiana.d2i.htrc.kmeans.MemCachedKMeansDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*from w w  w  .j  a  va2  s.c o  m*/
    addOutputOption();
    addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.clustersInOption()
            .withDescription(
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create());
    addOption(DefaultOptionCreator.numClustersOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create());
    addOption(DefaultOptionCreator.convergenceOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.overwriteOption().create());
    addOption(DefaultOptionCreator.clusteringOption().create());
    addOption(DefaultOptionCreator.methodOption().create());

    if (parseArguments(args) == null) {
        return -1;
    }

    Path input = getInputPath();
    Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
    Path output = getOutputPath();
    String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    if (measureClass == null) {
        measureClass = SquaredEuclideanDistanceMeasure.class.getName();
    }
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), output);
    }
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);

    Configuration conf = getConf();
    // clustersIn is used as host file
    MemCachedUtil.configHelper(conf, clusters.toUri().getPath());
    int k = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
    MemKMeansUtil.kmeansConfigHelper(conf, k);

    // create the seeds
    log.info("Create seeds.");
    if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
        MemRandomSeedGenerator.buildRandom(getConf(), input,
                Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
    }
    boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
    boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
            .equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
    if (getConf() == null) {
        setConf(new Configuration());
    }

    // run iteration
    run(getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering,
            runSequential);
    return 0;
}

From source file:org.conan.mymahout.clustering.syntheticcontrol.fuzzykmeans.Job.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*from w ww  .j a va2s .  c  o  m*/
    addOutputOption();
    addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.convergenceOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption(DefaultOptionCreator.overwriteOption().create());
    addOption(DefaultOptionCreator.t1Option().create());
    addOption(DefaultOptionCreator.t2Option().create());
    addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true);

    Map<String, List<String>> argMap = parseArguments(args);
    if (argMap == null) {
        return -1;
    }

    Path input = getInputPath();
    Path output = getOutputPath();
    String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    if (measureClass == null) {
        measureClass = SquaredEuclideanDistanceMeasure.class.getName();
    }
    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    float fuzziness = Float.parseFloat(getOption(M_OPTION));

    addOption(new DefaultOptionBuilder().withLongName(M_OPTION).withRequired(true)
            .withArgument(new ArgumentBuilder().withName(M_OPTION).withMinimum(1).withMaximum(1).create())
            .withDescription("coefficient normalization factor, must be greater than 1").withShortName(M_OPTION)
            .create());
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), output);
    }
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
    double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
    double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
    run(getConf(), input, output, measure, t1, t2, maxIterations, fuzziness, convergenceDelta);
    return 0;
}

From source file:org.swjtu.helloworldcn.APCDriver.java

License:Apache License

public int run(String[] arg0) throws Exception {
    // TODO Auto-generated method stub
    Configuration conf = getConf();
    addInputOption();/*w  w w. ja  v  a 2 s  . c om*/
    addOutputOption();
    addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
    //addOption(DefaultOptionCreator.distanceMeasureOption().create());
    addOption(DefaultOptionCreator.maxIterationsOption().create());
    addOption("inputdatatype", "tp", "Input data type,0 indicate original data; 1 indicate similary data", "0");

    addOption("dampfact", "lam",
            "update equation damping level in [0.5, 1).  Higher  values correspond to heavy damping, which may be needed if oscillations occur. (default: 0.9)",
            "0.9");
    addOption("convits", "cv",
            "if the estimated exemplars stay fixed for convits iterations, APCLUSTER terminates early (default: 100)",
            "100");
    // preference
    addOption("preference", "pf", "indicates the preference that data point i be chosen as an exemplar", "100");
    addOption("nonoise", "nn",
            "APCLUSTER adds a small amount of noise to similary data to prevent degenerate cases:(default: 0 enabled;1 disabled)",
            "0");
    Map<String, String> parsedArgs = parseArguments(arg0);
    if (parsedArgs == null) {
        return 0;
    }

    Path input = getInputPath();
    Path output = getOutputPath();
    // if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
    HadoopUtil.delete(conf, output);
    // }
    numDims = Integer.parseInt(parsedArgs.get("--dimensions"));
    /*String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass,DistanceMeasure.class);*/

    maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    System.out.println("convits" + parsedArgs.get("--convits"));
    convits = Integer.parseInt(parsedArgs.get("--convits"));
    lamda = Double.parseDouble(parsedArgs.get("--dampfact"));
    nonoise = Integer.parseInt(parsedArgs.get("--nonoise"));
    double preference = Double.parseDouble(parsedArgs.get("--preference"));
    int inputDataType = Integer.parseInt(parsedArgs.get("--inputdatatype"));
    e = new ArrayList<RandomAccessSparseVector>(convits);
    run(conf, input, output, numDims, preference, inputDataType, lamda, maxIterations);

    return 0;
}