Example usage for org.apache.commons.cli2 CommandLine getValue

List of usage examples for org.apache.commons.cli2 CommandLine getValue

Introduction

In this page you can find the example usage for org.apache.commons.cli2 CommandLine getValue.

Prototype

Object getValue(final Option option) throws IllegalStateException;

Source Link

Document

Retrieves the single Argument value associated with the specified Option

Usage

From source file:org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver.java

public static void main(String[] args) throws Exception {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option clustersOpt = DefaultOptionCreator.clustersInOption()
            .withDescription(//ww w  . ja  va  2 s  . co m
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create();
    Option kOpt = DefaultOptionCreator.kOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option mOpt = DefaultOptionCreator.mOption().create();
    Option numReduceTasksOpt = DefaultOptionCreator.numReducersOption().create();
    Option numMapTasksOpt = DefaultOptionCreator.numMappersOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option emitMostLikelyOpt = DefaultOptionCreator.emitMostLikelyOption().create();
    Option thresholdOpt = DefaultOptionCreator.thresholdOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(clustersOpt)
            .withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt)
            .withOption(maxIterationsOpt).withOption(kOpt).withOption(mOpt).withOption(overwriteOutput)
            .withOption(helpOpt).withOption(numMapTasksOpt).withOption(numReduceTasksOpt)
            .withOption(emitMostLikelyOpt).withOption(thresholdOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path clusters = new Path(cmdLine.getValue(clustersOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
        if (cmdLine.hasOption(measureClassOpt)) {
            measureClass = cmdLine.getValue(measureClassOpt).toString();
        }
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        float m = Float.parseFloat(cmdLine.getValue(mOpt).toString());

        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        int numMapTasks = Integer.parseInt(cmdLine.getValue(numMapTasksOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        boolean emitMostLikely = Boolean.parseBoolean(cmdLine.getValue(emitMostLikelyOpt).toString());
        double threshold = Double.parseDouble(cmdLine.getValue(thresholdOpt).toString());
        if (cmdLine.hasOption(kOpt)) {
            clusters = RandomSeedGenerator.buildRandom(input, clusters,
                    Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numMapTasks,
                numReduceTasks, m, cmdLine.hasOption(clusteringOpt), emitMostLikely, threshold);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:org.apache.mahout.clustering.kmeans.KMeansDriver.java

public static void main(String[] args) throws Exception {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option clustersOpt = DefaultOptionCreator.clustersInOption()
            .withDescription(//from   www .  j a  va 2s  . c o m
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create();
    Option kOpt = DefaultOptionCreator.kOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option numReduceTasksOpt = DefaultOptionCreator.numReducersOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(clustersOpt)
            .withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt)
            .withOption(maxIterationsOpt).withOption(numReduceTasksOpt).withOption(kOpt)
            .withOption(overwriteOutput).withOption(helpOpt).withOption(clusteringOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path clusters = new Path(cmdLine.getValue(clustersOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = cmdLine.getValue(measureClassOpt).toString();
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString());
        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        if (cmdLine.hasOption(kOpt)) {
            clusters = RandomSeedGenerator.buildRandom(input, clusters,
                    Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks,
                cmdLine.hasOption(clusteringOpt));
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.lda.cvb.InMemoryCollapsedVariationalBayes0.java

public static int main2(String[] args, Configuration conf) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having "
                    + "one doc per line")
            .withShortName("i").create();

    Option dictOpt = obuilder.withLongName("dictionary").withRequired(false)
            .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create())
            .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create();

    Option dfsOpt = obuilder.withLongName("dfs").withRequired(false)
            .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS namenode URI").withShortName("dfs").create();

    Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true)
            .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of topics to learn").withShortName("top").create();

    Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true)
            .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(term | topic)").withShortName("to").create();

    Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true)
            .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(topic | docid)").withShortName("do").create();

    Option alphaOpt = obuilder.withLongName("alpha").withRequired(false)
            .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create();

    Option etaOpt = obuilder.withLongName("eta").withRequired(false)
            .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create();

    Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false)
            .withArgument(//  w w  w  . j ava2 s  .co m
                    abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault("10").create())
            .withDescription("Maximum number of training passes").withShortName("m").create();

    Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false)
            .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1)
                    .withDefault("0.0").create())
            .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|")
            .create();

    Option burnInOpt = obuilder
            .withLongName("burnInIterations").withRequired(false).withArgument(abuilder
                    .withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault("5").create())
            .withDescription("Minimum number of iterations").withShortName("b").create();

    Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false)
            .withArgument(
                    abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create())
            .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c")
            .create();

    Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false)
            .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no")
                    .create())
            .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt")
            .create();

    Option numTrainThreadsOpt = obuilder
            .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder
                    .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create())
            .withDescription("number of threads to train with").withShortName("ntt").create();

    Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false)
            .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1")
                    .create())
            .withDescription("number of threads to update the model with").withShortName("nut").create();

    Option verboseOpt = obuilder.withLongName("verbose").withRequired(false)
            .withArgument(
                    abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create())
            .withDescription("print verbose information, like top-terms in each topic, during iteration")
            .withShortName("v").create();

    Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt)
            .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt)
            .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt)
            .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt)
            .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt)
            .withOption(modelCorpusFractionOption).withOption(verboseOpt).create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String inputDirString = (String) cmdLine.getValue(inputDirOpt);
        String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null;
        int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt));
        double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt));
        double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt));
        int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt));
        int burnInIterations = Integer.parseInt((String) cmdLine.getValue(burnInOpt));
        double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt));
        int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt));
        int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt));
        String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt);
        String docOutFile = (String) cmdLine.getValue(outputDocFileOpt);
        //String reInferDocTopics = (String)cmdLine.getValue(reInferDocTopicsOpt);
        boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt));
        double modelCorpusFraction = Double.parseDouble((String) cmdLine.getValue(modelCorpusFractionOption));

        long start = System.nanoTime();

        if (conf.get("fs.default.name") == null) {
            String dfsNameNode = (String) cmdLine.getValue(dfsOpt);
            conf.set("fs.default.name", dfsNameNode);
        }
        String[] terms = loadDictionary(dictDirString, conf);
        logTime("dictionary loading", System.nanoTime() - start);
        start = System.nanoTime();
        Matrix corpus = loadVectors(inputDirString, conf);
        logTime("vector seqfile corpus loading", System.nanoTime() - start);
        start = System.nanoTime();
        InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms,
                numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction);
        logTime("cvb0 init", System.nanoTime() - start);

        start = System.nanoTime();
        cvb0.setVerbose(verbose);
        cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations);
        logTime("total training time", System.nanoTime() - start);

        /*
        if ("randstart".equalsIgnoreCase(reInferDocTopics)) {
          cvb0.inferDocuments(0.0, 100, true);
        } else if ("continue".equalsIgnoreCase(reInferDocTopics)) {
          cvb0.inferDocuments(0.0, 100, false);
        }
         */

        start = System.nanoTime();
        cvb0.writeModel(new Path(topicOutFile));
        DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts);
        logTime("printTopics", System.nanoTime() - start);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
    }
    return 0;
}

From source file:org.apache.mahout.clustering.lda.LDADriver.java

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option topicsOpt = DefaultOptionCreator.numTopicsOption().create();
    Option wordsOpt = DefaultOptionCreator.numWordsOption().create();
    Option topicSmOpt = DefaultOptionCreator.topicSmoothingOption().create();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create();
    Option numReducOpt = DefaultOptionCreator.numReducersOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(topicsOpt).withOption(wordsOpt).withOption(topicSmOpt).withOption(maxIterOpt)
            .withOption(numReducOpt).withOption(overwriteOutput).withOption(helpOpt).create();
    try {//from  w w w .ja  va 2  s .co  m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReducOpt).toString());
        int numTopics = Integer.parseInt(cmdLine.getValue(topicsOpt).toString());
        int numWords = Integer.parseInt(cmdLine.getValue(wordsOpt).toString());
        double topicSmoothing = Double.parseDouble(cmdLine.getValue(maxIterOpt).toString());
        if (topicSmoothing < 1) {
            topicSmoothing = 50.0 / numTopics;
        }

        runJob(input, output, numTopics, numWords, topicSmoothing, maxIterations, numReduceTasks);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.lda.LDAPrintTopics.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option dictOpt = obuilder.withLongName("dict").withRequired(true)
            .withArgument(abuilder.withName("dict").withMinimum(1).withMaximum(1).create())
            .withDescription("Dictionary to read in, in the same format as one created by "
                    + "org.apache.mahout.utils.vectors.lucene.Driver")
            .withShortName("d").create();

    Option outOpt = DefaultOptionCreator.outputOption().create();

    Option wordOpt = obuilder.withLongName("words").withRequired(false)
            .withArgument(abuilder.withName("words").withMinimum(0).withMaximum(1).withDefault("20").create())
            .withDescription("Number of words to print").withShortName("w").create();
    Option dictTypeOpt = obuilder.withLongName("dictionaryType").withRequired(false)
            .withArgument(abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create())
            .withDescription("The dictionary file type (text|sequencefile)").withShortName("dt").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();//from  w  ww. j a  v  a2s.c  o m

    Group group = gbuilder.withName("Options").withOption(dictOpt).withOption(outOpt).withOption(wordOpt)
            .withOption(inputOpt).withOption(dictTypeOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String input = cmdLine.getValue(inputOpt).toString();
        String dictFile = cmdLine.getValue(dictOpt).toString();
        int numWords = 20;
        if (cmdLine.hasOption(wordOpt)) {
            numWords = Integer.parseInt(cmdLine.getValue(wordOpt).toString());
        }
        Configuration config = new Configuration();

        String dictionaryType = "text";
        if (cmdLine.hasOption(dictTypeOpt)) {
            dictionaryType = cmdLine.getValue(dictTypeOpt).toString();
        }

        List<String> wordList;
        if ("text".equals(dictionaryType)) {
            wordList = Arrays.asList(VectorHelper.loadTermDictionary(new File(dictFile)));
        } else if ("sequencefile".equals(dictionaryType)) {
            wordList = Arrays.asList(VectorHelper.loadTermDictionary(config, dictFile));
        } else {
            throw new IllegalArgumentException("Invalid dictionary format");
        }

        List<Queue<Pair<String, Double>>> topWords = topWordsForTopics(input, config, wordList, numWords);

        File output = null;
        if (cmdLine.hasOption(outOpt)) {
            output = new File(cmdLine.getValue(outOpt).toString());
            if (!output.exists() && !output.mkdirs()) {
                throw new IOException("Could not create directory: " + output);
            }
        }
        printTopWords(topWords, output);
    } catch (OptionException e) {
        CommandLineUtil.printHelp(group);
        throw e;
    }
}

From source file:org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver.java

public static void main(String[] args) throws IOException {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option inputIsCanopiesOpt = DefaultOptionCreator.inputIsCanopiesOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option threshold1Opt = DefaultOptionCreator.t1Option().create();
    Option threshold2Opt = DefaultOptionCreator.t2Option().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(overwriteOutput).withOption(measureClassOpt).withOption(helpOpt)
            .withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(threshold2Opt)
            .withOption(clusteringOpt).withOption(maxIterOpt).withOption(inputIsCanopiesOpt).create();

    try {//from  w  w  w .  ja v  a 2  s . c  o  m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = cmdLine.getValue(measureClassOpt).toString();
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString());
        double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations,
                cmdLine.hasOption(inputIsCanopiesOpt), cmdLine.hasOption(clusteringOpt));
    } catch (OptionException e) {
        log.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.streaming.tools.ClusterQualitySummarizer.java

private boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withShortName("i").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get seq files with the vectors (training set)").create();

    Option testInputFileOption = builder.withLongName("testInput").withShortName("itest")
            .withArgument(argumentBuilder.withName("testInput").withMaximum(1).create())
            .withDescription("where to get seq files with the vectors (test set)").create();

    Option centroidsFileOption = builder.withLongName("centroids").withShortName("c").withRequired(true)
            .withArgument(argumentBuilder.withName("centroids").withMaximum(1).create())
            .withDescription(//from www .j  av a2 s.com
                    "where to get seq files with the centroids (from Mahout KMeans or StreamingKMeansDriver)")
            .create();

    Option centroidsCompareFileOption = builder.withLongName("centroidsCompare").withShortName("cc")
            .withRequired(false)
            .withArgument(argumentBuilder.withName("centroidsCompare").withMaximum(1).create())
            .withDescription("where to get seq files with the second set of centroids (from Mahout KMeans or "
                    + "StreamingKMeansDriver)")
            .create();

    Option outputFileOption = builder.withLongName("output").withShortName("o").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to dump the CSV file with the results").create();

    Option mahoutKMeansFormatOption = builder.withLongName("mahoutkmeansformat").withShortName("mkm")
            .withDescription("if set, read files as (IntWritable, ClusterWritable) pairs")
            .withArgument(argumentBuilder.withName("numpoints").withMaximum(1).create()).create();

    Option mahoutKMeansCompareFormatOption = builder.withLongName("mahoutkmeansformatCompare")
            .withShortName("mkmc").withDescription("if set, read files as (IntWritable, ClusterWritable) pairs")
            .withArgument(argumentBuilder.withName("numpoints").withMaximum(1).create()).create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption)
            .withOption(testInputFileOption).withOption(outputFileOption).withOption(centroidsFileOption)
            .withOption(centroidsCompareFileOption).withOption(mahoutKMeansFormatOption)
            .withOption(mahoutKMeansCompareFormatOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 150));

    CommandLine cmdLine = parser.parseAndHelp(args);
    if (cmdLine == null) {
        return false;
    }

    trainFile = (String) cmdLine.getValue(inputFileOption);
    if (cmdLine.hasOption(testInputFileOption)) {
        testFile = (String) cmdLine.getValue(testInputFileOption);
    }
    centroidFile = (String) cmdLine.getValue(centroidsFileOption);
    if (cmdLine.hasOption(centroidsCompareFileOption)) {
        centroidCompareFile = (String) cmdLine.getValue(centroidsCompareFileOption);
    }
    outputFile = (String) cmdLine.getValue(outputFileOption);
    if (cmdLine.hasOption(mahoutKMeansFormatOption)) {
        mahoutKMeansFormat = true;
    }
    if (cmdLine.hasOption(mahoutKMeansCompareFormatOption)) {
        mahoutKMeansFormatCompare = true;
    }
    return true;
}

From source file:org.apache.mahout.clustering.streaming.tools.ResplitSequenceFiles.java

private boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withShortName("i").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription(/*from   w  w w .ja  va2 s .  c  om*/
                    "what the base folder for sequence files is (they all must have the same key/value type")
            .create();

    Option outputFileOption = builder.withLongName("output").withShortName("o").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription(
                    "the base name of the file split that the files will be split it; the i'th split has the "
                            + "suffix -i")
            .create();

    Option numSplitsOption = builder.withLongName("numSplits").withShortName("ns").withRequired(true)
            .withArgument(argumentBuilder.withName("numSplits").withMaximum(1).create())
            .withDescription("how many splits to use for the given files").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption)
            .withOption(outputFileOption).withOption(numSplitsOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = (String) cmdLine.getValue(inputFileOption);
    outputFileBase = (String) cmdLine.getValue(outputFileOption);
    numSplits = Integer.parseInt((String) cmdLine.getValue(numSplitsOption));
    return true;
}

From source file:org.apache.mahout.df.BreimanExample.java

@Override
public int run(String[] args) throws IOException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true)
            .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of trees to grow, each iteration").create();

    Option nbItersOpt = obuilder.withLongName("iterations").withShortName("i").withRequired(true)
            .withArgument(abuilder.withName("numIterations").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of times to repeat the test").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();// ww w . ja  va  2 s .c  om

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(nbItersOpt)
            .withOption(nbtreesOpt).withOption(helpOpt).create();

    Path dataPath;
    Path datasetPath;
    int nbTrees;
    int nbIterations;

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());
        nbIterations = Integer.parseInt(cmdLine.getValue(nbItersOpt).toString());

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    // load the data
    FileSystem fs = dataPath.getFileSystem(new Configuration());
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    Data data = DataLoader.loadData(dataset, fs, dataPath);

    // take m to be the first integer less than log2(M) + 1, where M is the
    // number of inputs
    int m = (int) Math.floor(Maths.log(2, data.getDataset().nbAttributes()) + 1);

    Random rng = RandomUtils.getRandom();
    for (int iteration = 0; iteration < nbIterations; iteration++) {
        log.info("Iteration {}", iteration);
        runIteration(rng, data, m, nbTrees);
    }

    log.info("********************************************");
    log.info("Selection error : {}", sumTestErr / nbIterations);
    log.info("Single Input error : {}", sumOneErr / nbIterations);
    log.info("One Tree error : {}", sumTreeErr / nbIterations);
    log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations));
    log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations));
    log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations);
    log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations);

    return 0;
}

From source file:org.apache.mahout.df.mapred.BuildForest.java

@Override
public int run(String[] args) throws IOException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option oobOpt = obuilder.withShortName("oob").withRequired(false)
            .withDescription("Optional, estimate the out-of-bag error").create();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option selectionOpt = obuilder.withLongName("selection").withShortName("sl").withRequired(true)
            .withArgument(abuilder.withName("m").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of variables to select randomly at each tree-node").create();

    Option seedOpt = obuilder.withLongName("seed").withShortName("sd").withRequired(false)
            .withArgument(abuilder.withName("seed").withMinimum(1).withMaximum(1).create())
            .withDescription("Optional, seed value used to initialise the Random number generator").create();

    Option partialOpt = obuilder.withLongName("partial").withShortName("p").withRequired(false)
            .withDescription("Optional, use the Partial Data implementation").create();

    Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true)
            .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of trees to grow").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();//from  w  w w .j  a v a  2s .  c  o m

    Group group = gbuilder.withName("Options").withOption(oobOpt).withOption(dataOpt).withOption(datasetOpt)
            .withOption(selectionOpt).withOption(seedOpt).withOption(partialOpt).withOption(nbtreesOpt)
            .withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        isPartial = cmdLine.hasOption(partialOpt);
        isOob = cmdLine.hasOption(oobOpt);
        String dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        m = Integer.parseInt(cmdLine.getValue(selectionOpt).toString());
        nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());

        if (cmdLine.hasOption(seedOpt)) {
            seed = Long.valueOf(cmdLine.getValue(seedOpt).toString());
        }

        log.debug("data : {}", dataName);
        log.debug("dataset : {}", datasetName);
        log.debug("m : {}", m);
        log.debug("seed : {}", seed);
        log.debug("nbtrees : {}", nbTrees);
        log.debug("isPartial : {}", isPartial);
        log.debug("isOob : {}", isOob);

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);

    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    buildForest();

    return 0;
}