Example usage for org.apache.commons.cli2 CommandLine getValue

Introduction

In this page you can find the example usage for org.apache.commons.cli2 CommandLine getValue.

Prototype

Object getValue(final Option option) throws IllegalStateException;

Source Link

Document

Retrieves the single Argument value associated with the specified Option

Usage

From source file:org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver.java

public static void main(String[] args) throws Exception {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option clustersOpt = DefaultOptionCreator.clustersInOption()
            .withDescription(//ww w  . ja  va  2 s  . co m
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create();
    Option kOpt = DefaultOptionCreator.kOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option mOpt = DefaultOptionCreator.mOption().create();
    Option numReduceTasksOpt = DefaultOptionCreator.numReducersOption().create();
    Option numMapTasksOpt = DefaultOptionCreator.numMappersOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option emitMostLikelyOpt = DefaultOptionCreator.emitMostLikelyOption().create();
    Option thresholdOpt = DefaultOptionCreator.thresholdOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(clustersOpt)
            .withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt)
            .withOption(maxIterationsOpt).withOption(kOpt).withOption(mOpt).withOption(overwriteOutput)
            .withOption(helpOpt).withOption(numMapTasksOpt).withOption(numReduceTasksOpt)
            .withOption(emitMostLikelyOpt).withOption(thresholdOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path clusters = new Path(cmdLine.getValue(clustersOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
        if (cmdLine.hasOption(measureClassOpt)) {
            measureClass = cmdLine.getValue(measureClassOpt).toString();
        }
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        float m = Float.parseFloat(cmdLine.getValue(mOpt).toString());

        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        int numMapTasks = Integer.parseInt(cmdLine.getValue(numMapTasksOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        boolean emitMostLikely = Boolean.parseBoolean(cmdLine.getValue(emitMostLikelyOpt).toString());
        double threshold = Double.parseDouble(cmdLine.getValue(thresholdOpt).toString());
        if (cmdLine.hasOption(kOpt)) {
            clusters = RandomSeedGenerator.buildRandom(input, clusters,
                    Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numMapTasks,
                numReduceTasks, m, cmdLine.hasOption(clusteringOpt), emitMostLikely, threshold);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:org.apache.mahout.clustering.kmeans.KMeansDriver.java

public static void main(String[] args) throws Exception {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option clustersOpt = DefaultOptionCreator.clustersInOption()
            .withDescription(//from   www .  j a  va 2s  . c o m
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create();
    Option kOpt = DefaultOptionCreator.kOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option numReduceTasksOpt = DefaultOptionCreator.numReducersOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(clustersOpt)
            .withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt)
            .withOption(maxIterationsOpt).withOption(numReduceTasksOpt).withOption(kOpt)
            .withOption(overwriteOutput).withOption(helpOpt).withOption(clusteringOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path clusters = new Path(cmdLine.getValue(clustersOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = cmdLine.getValue(measureClassOpt).toString();
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString());
        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        if (cmdLine.hasOption(kOpt)) {
            clusters = RandomSeedGenerator.buildRandom(input, clusters,
                    Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks,
                cmdLine.hasOption(clusteringOpt));
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.lda.cvb.InMemoryCollapsedVariationalBayes0.java

public static int main2(String[] args, Configuration conf) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having "
                    + "one doc per line")
            .withShortName("i").create();

    Option dictOpt = obuilder.withLongName("dictionary").withRequired(false)
            .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create())
            .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create();

    Option dfsOpt = obuilder.withLongName("dfs").withRequired(false)
            .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS namenode URI").withShortName("dfs").create();

    Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true)
            .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of topics to learn").withShortName("top").create();

    Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true)
            .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(term | topic)").withShortName("to").create();

    Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true)
            .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(topic | docid)").withShortName("do").create();

    Option alphaOpt = obuilder.withLongName("alpha").withRequired(false)
            .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create();

    Option etaOpt = obuilder.withLongName("eta").withRequired(false)
            .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create();

    Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false)
            .withArgument(//  w w  w  . j ava2 s  .co m
                    abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault("10").create())
            .withDescription("Maximum number of training passes").withShortName("m").create();

    Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false)
            .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1)
                    .withDefault("0.0").create())
            .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|")
            .create();

    Option burnInOpt = obuilder
            .withLongName("burnInIterations").withRequired(false).withArgument(abuilder
                    .withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault("5").create())
            .withDescription("Minimum number of iterations").withShortName("b").create();

    Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false)
            .withArgument(
                    abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create())
            .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c")
            .create();

    Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false)
            .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no")
                    .create())
            .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt")
            .create();

    Option numTrainThreadsOpt = obuilder
            .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder
                    .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create())
            .withDescription("number of threads to train with").withShortName("ntt").create();

    Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false)
            .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1")
                    .create())
            .withDescription("number of threads to update the model with").withShortName("nut").create();

    Option verboseOpt = obuilder.withLongName("verbose").withRequired(false)
            .withArgument(
                    abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create())
            .withDescription("print verbose information, like top-terms in each topic, during iteration")
            .withShortName("v").create();

    Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt)
            .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt)
            .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt)
            .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt)
            .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt)
            .withOption(modelCorpusFractionOption).withOption(verboseOpt).create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String inputDirString = (String) cmdLine.getValue(inputDirOpt);
        String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null;
        int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt));
        double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt));
        double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt));
        int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt));
        int burnInIterations = Integer.parseInt((String) cmdLine.getValue(burnInOpt));
        double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt));
        int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt));
        int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt));
        String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt);
        String docOutFile = (String) cmdLine.getValue(outputDocFileOpt);
        //String reInferDocTopics = (String)cmdLine.getValue(reInferDocTopicsOpt);
        boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt));
        double modelCorpusFraction = Double.parseDouble((String) cmdLine.getValue(modelCorpusFractionOption));

        long start = System.nanoTime();

        if (conf.get("fs.default.name") == null) {
            String dfsNameNode = (String) cmdLine.getValue(dfsOpt);
            conf.set("fs.default.name", dfsNameNode);
        }
        String[] terms = loadDictionary(dictDirString, conf);
        logTime("dictionary loading", System.nanoTime() - start);
        start = System.nanoTime();
        Matrix corpus = loadVectors(inputDirString, conf);
        logTime("vector seqfile corpus loading", System.nanoTime() - start);
        start = System.nanoTime();
        InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms,
                numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction);
        logTime("cvb0 init", System.nanoTime() - start);

        start = System.nanoTime();
        cvb0.setVerbose(verbose);
        cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations);
        logTime("total training time", System.nanoTime() - start);

        /*
        if ("randstart".equalsIgnoreCase(reInferDocTopics)) {
          cvb0.inferDocuments(0.0, 100, true);
        } else if ("continue".equalsIgnoreCase(reInferDocTopics)) {
          cvb0.inferDocuments(0.0, 100, false);
        }
         */

        start = System.nanoTime();
        cvb0.writeModel(new Path(topicOutFile));
        DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts);
        logTime("printTopics", System.nanoTime() - start);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
    }
    return 0;
}

From source file:org.apache.mahout.clustering.lda.LDADriver.java

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option topicsOpt = DefaultOptionCreator.numTopicsOption().create();
    Option wordsOpt = DefaultOptionCreator.numWordsOption().create();
    Option topicSmOpt = DefaultOptionCreator.topicSmoothingOption().create();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create();
    Option numReducOpt = DefaultOptionCreator.numReducersOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(topicsOpt).withOption(wordsOpt).withOption(topicSmOpt).withOption(maxIterOpt)
            .withOption(numReducOpt).withOption(overwriteOutput).withOption(helpOpt).create();
    try {//from  w w w .ja  va 2  s .co  m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReducOpt).toString());
        int numTopics = Integer.parseInt(cmdLine.getValue(topicsOpt).toString());
        int numWords = Integer.parseInt(cmdLine.getValue(wordsOpt).toString());
        double topicSmoothing = Double.parseDouble(cmdLine.getValue(maxIterOpt).toString());
        if (topicSmoothing < 1) {
            topicSmoothing = 50.0 / numTopics;
        }

        runJob(input, output, numTopics, numWords, topicSmoothing, maxIterations, numReduceTasks);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.lda.LDAPrintTopics.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option dictOpt = obuilder.withLongName("dict").withRequired(true)
            .withArgument(abuilder.withName("dict").withMinimum(1).withMaximum(1).create())
            .withDescription("Dictionary to read in, in the same format as one created by "
                    + "org.apache.mahout.utils.vectors.lucene.Driver")
            .withShortName("d").create();

    Option outOpt = DefaultOptionCreator.outputOption().create();

    Option wordOpt = obuilder.withLongName("words").withRequired(false)
            .withArgument(abuilder.withName("words").withMinimum(0).withMaximum(1).withDefault("20").create())
            .withDescription("Number of words to print").withShortName("w").create();
    Option dictTypeOpt = obuilder.withLongName("dictionaryType").withRequired(false)
            .withArgument(abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create())
            .withDescription("The dictionary file type (text|sequencefile)").withShortName("dt").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();//from  w  ww. j a  v  a2s.c  o m

    Group group = gbuilder.withName("Options").withOption(dictOpt).withOption(outOpt).withOption(wordOpt)
            .withOption(inputOpt).withOption(dictTypeOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String input = cmdLine.getValue(inputOpt).toString();
        String dictFile = cmdLine.getValue(dictOpt).toString();
        int numWords = 20;
        if (cmdLine.hasOption(wordOpt)) {
            numWords = Integer.parseInt(cmdLine.getValue(wordOpt).toString());
        }
        Configuration config = new Configuration();

        String dictionaryType = "text";
        if (cmdLine.hasOption(dictTypeOpt)) {
            dictionaryType = cmdLine.getValue(dictTypeOpt).toString();
        }

        List<String> wordList;
        if ("text".equals(dictionaryType)) {
            wordList = Arrays.asList(VectorHelper.loadTermDictionary(new File(dictFile)));
        } else if ("sequencefile".equals(dictionaryType)) {
            wordList = Arrays.asList(VectorHelper.loadTermDictionary(config, dictFile));
        } else {
            throw new IllegalArgumentException("Invalid dictionary format");
        }

        List<Queue<Pair<String, Double>>> topWords = topWordsForTopics(input, config, wordList, numWords);

        File output = null;
        if (cmdLine.hasOption(outOpt)) {
            output = new File(cmdLine.getValue(outOpt).toString());
            if (!output.exists() && !output.mkdirs()) {
                throw new IOException("Could not create directory: " + output);
            }
        }
        printTopWords(topWords, output);
    } catch (OptionException e) {
        CommandLineUtil.printHelp(group);
        throw e;
    }
}

From source file:org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver.java

public static void main(String[] args) throws IOException {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option inputIsCanopiesOpt = DefaultOptionCreator.inputIsCanopiesOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option threshold1Opt = DefaultOptionCreator.t1Option().create();
    Option threshold2Opt = DefaultOptionCreator.t2Option().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(overwriteOutput).withOption(measureClassOpt).withOption(helpOpt)
            .withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(threshold2Opt)
            .withOption(clusteringOpt).withOption(maxIterOpt).withOption(inputIsCanopiesOpt).create();

    try {//from  w  w  w .  ja v  a 2  s . c  o  m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = cmdLine.getValue(measureClassOpt).toString();
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString());
        double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations,
                cmdLine.hasOption(inputIsCanopiesOpt), cmdLine.hasOption(clusteringOpt));
    } catch (OptionException e) {
        log.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.streaming.tools.ClusterQualitySummarizer.java

private boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withShortName("i").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get seq files with the vectors (training set)").create();

    Option testInputFileOption = builder.withLongName("testInput").withShortName("itest")
            .withArgument(argumentBuilder.withName("testInput").withMaximum(1).create())
            .withDescription("where to get seq files with the vectors (test set)").create();

    Option centroidsFileOption = builder.withLongName("centroids").withShortName("c").withRequired(true)
            .withArgument(argumentBuilder.withName("centroids").withMaximum(1).create())
            .withDescription(//from www .j  av a2 s.com
                    "where to get seq files with the centroids (from Mahout KMeans or StreamingKMeansDriver)")
            .create();

    Option centroidsCompareFileOption = builder.withLongName("centroidsCompare").withShortName("cc")
            .withRequired(false)
            .withArgument(argumentBuilder.withName("centroidsCompare").withMaximum(1).create())
            .withDescription("where to get seq files with the second set of centroids (from Mahout KMeans or "
                    + "StreamingKMeansDriver)")
            .create();

    Option outputFileOption = builder.withLongName("output").withShortName("o").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to dump the CSV file with the results").create();

    Option mahoutKMeansFormatOption = builder.withLongName("mahoutkmeansformat").withShortName("mkm")
            .withDescription("if set, read files as (IntWritable, ClusterWritable) pairs")
            .withArgument(argumentBuilder.withName("numpoints").withMaximum(1).create()).create();

    Option mahoutKMeansCompareFormatOption = builder.withLongName("mahoutkmeansformatCompare")
            .withShortName("mkmc").withDescription("if set, read files as (IntWritable, ClusterWritable) pairs")
            .withArgument(argumentBuilder.withName("numpoints").withMaximum(1).create()).create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption)
            .withOption(testInputFileOption).withOption(outputFileOption).withOption(centroidsFileOption)
            .withOption(centroidsCompareFileOption).withOption(mahoutKMeansFormatOption)
            .withOption(mahoutKMeansCompareFormatOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 150));

    CommandLine cmdLine = parser.parseAndHelp(args);
    if (cmdLine == null) {
        return false;
    }

    trainFile = (String) cmdLine.getValue(inputFileOption);
    if (cmdLine.hasOption(testInputFileOption)) {
        testFile = (String) cmdLine.getValue(testInputFileOption);
    }
    centroidFile = (String) cmdLine.getValue(centroidsFileOption);
    if (cmdLine.hasOption(centroidsCompareFileOption)) {
        centroidCompareFile = (String) cmdLine.getValue(centroidsCompareFileOption);
    }
    outputFile = (String) cmdLine.getValue(outputFileOption);
    if (cmdLine.hasOption(mahoutKMeansFormatOption)) {
        mahoutKMeansFormat = true;
    }
    if (cmdLine.hasOption(mahoutKMeansCompareFormatOption)) {
        mahoutKMeansFormatCompare = true;
    }
    return true;
}

From source file:org.apache.mahout.clustering.streaming.tools.ResplitSequenceFiles.java

private boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withShortName("i").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription(/*from   w  w w .ja  va2 s .  c  om*/
                    "what the base folder for sequence files is (they all must have the same key/value type")
            .create();

    Option outputFileOption = builder.withLongName("output").withShortName("o").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription(
                    "the base name of the file split that the files will be split it; the i'th split has the "
                            + "suffix -i")
            .create();

    Option numSplitsOption = builder.withLongName("numSplits").withShortName("ns").withRequired(true)
            .withArgument(argumentBuilder.withName("numSplits").withMaximum(1).create())
            .withDescription("how many splits to use for the given files").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption)
            .withOption(outputFileOption).withOption(numSplitsOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = (String) cmdLine.getValue(inputFileOption);
    outputFileBase = (String) cmdLine.getValue(outputFileOption);
    numSplits = Integer.parseInt((String) cmdLine.getValue(numSplitsOption));
    return true;
}

From source file:org.apache.mahout.df.BreimanExample.java

@Override
public int run(String[] args) throws IOException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true)
            .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of trees to grow, each iteration").create();

    Option nbItersOpt = obuilder.withLongName("iterations").withShortName("i").withRequired(true)
            .withArgument(abuilder.withName("numIterations").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of times to repeat the test").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();// ww w . ja  va  2 s .c  om

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(nbItersOpt)
            .withOption(nbtreesOpt).withOption(helpOpt).create();

    Path dataPath;
    Path datasetPath;
    int nbTrees;
    int nbIterations;

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());
        nbIterations = Integer.parseInt(cmdLine.getValue(nbItersOpt).toString());

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    // load the data
    FileSystem fs = dataPath.getFileSystem(new Configuration());
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    Data data = DataLoader.loadData(dataset, fs, dataPath);

    // take m to be the first integer less than log2(M) + 1, where M is the
    // number of inputs
    int m = (int) Math.floor(Maths.log(2, data.getDataset().nbAttributes()) + 1);

    Random rng = RandomUtils.getRandom();
    for (int iteration = 0; iteration < nbIterations; iteration++) {
        log.info("Iteration {}", iteration);
        runIteration(rng, data, m, nbTrees);
    }

    log.info("********************************************");
    log.info("Selection error : {}", sumTestErr / nbIterations);
    log.info("Single Input error : {}", sumOneErr / nbIterations);
    log.info("One Tree error : {}", sumTreeErr / nbIterations);
    log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations));
    log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations));
    log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations);
    log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations);

    return 0;
}

From source file:org.apache.mahout.df.mapred.BuildForest.java

@Override
public int run(String[] args) throws IOException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option oobOpt = obuilder.withShortName("oob").withRequired(false)
            .withDescription("Optional, estimate the out-of-bag error").create();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option selectionOpt = obuilder.withLongName("selection").withShortName("sl").withRequired(true)
            .withArgument(abuilder.withName("m").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of variables to select randomly at each tree-node").create();

    Option seedOpt = obuilder.withLongName("seed").withShortName("sd").withRequired(false)
            .withArgument(abuilder.withName("seed").withMinimum(1).withMaximum(1).create())
            .withDescription("Optional, seed value used to initialise the Random number generator").create();

    Option partialOpt = obuilder.withLongName("partial").withShortName("p").withRequired(false)
            .withDescription("Optional, use the Partial Data implementation").create();

    Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true)
            .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of trees to grow").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();//from  w  w w .j  a v a  2s .  c  o m

    Group group = gbuilder.withName("Options").withOption(oobOpt).withOption(dataOpt).withOption(datasetOpt)
            .withOption(selectionOpt).withOption(seedOpt).withOption(partialOpt).withOption(nbtreesOpt)
            .withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        isPartial = cmdLine.hasOption(partialOpt);
        isOob = cmdLine.hasOption(oobOpt);
        String dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        m = Integer.parseInt(cmdLine.getValue(selectionOpt).toString());
        nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());

        if (cmdLine.hasOption(seedOpt)) {
            seed = Long.valueOf(cmdLine.getValue(seedOpt).toString());
        }

        log.debug("data : {}", dataName);
        log.debug("dataset : {}", datasetName);
        log.debug("m : {}", m);
        log.debug("seed : {}", seed);
        log.debug("nbtrees : {}", nbTrees);
        log.debug("isPartial : {}", isPartial);
        log.debug("isOob : {}", isOob);

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);

    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    buildForest();

    return 0;
}