Example usage for org.apache.commons.cli2.commandline Parser parse

List of usage examples for org.apache.commons.cli2.commandline Parser parse

Introduction

In this page you can find the example usage for org.apache.commons.cli2.commandline Parser parse.

Prototype

public CommandLine parse(final String[] arguments) throws OptionException 

Source Link

Document

Parse the arguments according to the specified options and properties.

Usage

From source file:org.apache.mahout.clustering.dirichlet.DirichletDriver.java

public static void main(String[] args) throws Exception {
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option kOpt = DefaultOptionCreator.kOption().withRequired(true).create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option alphaOpt = DefaultOptionCreator.alphaOption().create();
    Option modelDistOpt = DefaultOptionCreator.modelDistributionOption().create();
    Option prototypeOpt = DefaultOptionCreator.modelPrototypeOption().create();
    Option numRedOpt = DefaultOptionCreator.numReducersOption().create();
    Option emitMostLikelyOpt = DefaultOptionCreator.emitMostLikelyOption().create();
    Option thresholdOpt = DefaultOptionCreator.thresholdOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(overwriteOutput).withOption(modelDistOpt).withOption(prototypeOpt)
            .withOption(maxIterOpt).withOption(alphaOpt).withOption(kOpt).withOption(helpOpt)
            .withOption(numRedOpt).withOption(clusteringOpt).withOption(emitMostLikelyOpt)
            .withOption(thresholdOpt).create();

    try {//from w  w w  . j a va  2  s  .co  m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        String modelFactory = cmdLine.getValue(modelDistOpt).toString();
        String modelPrototype = cmdLine.getValue(prototypeOpt).toString();
        int numModels = Integer.parseInt(cmdLine.getValue(kOpt).toString());
        int numReducers = Integer.parseInt(cmdLine.getValue(numRedOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        boolean emitMostLikely = Boolean.parseBoolean(cmdLine.getValue(emitMostLikelyOpt).toString());
        double threshold = Double.parseDouble(cmdLine.getValue(thresholdOpt).toString());
        double alpha_0 = Double.parseDouble(cmdLine.getValue(alphaOpt).toString());

        runJob(input, output, modelFactory, modelPrototype, numModels, maxIterations, alpha_0, numReducers,
                cmdLine.hasOption(clusteringOpt), emitMostLikely, threshold);
    } catch (OptionException e) {
        log.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver.java

public static void main(String[] args) throws Exception {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option clustersOpt = DefaultOptionCreator.clustersInOption()
            .withDescription(//  w w w.  j av  a  2s.  com
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create();
    Option kOpt = DefaultOptionCreator.kOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option mOpt = DefaultOptionCreator.mOption().create();
    Option numReduceTasksOpt = DefaultOptionCreator.numReducersOption().create();
    Option numMapTasksOpt = DefaultOptionCreator.numMappersOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option emitMostLikelyOpt = DefaultOptionCreator.emitMostLikelyOption().create();
    Option thresholdOpt = DefaultOptionCreator.thresholdOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(clustersOpt)
            .withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt)
            .withOption(maxIterationsOpt).withOption(kOpt).withOption(mOpt).withOption(overwriteOutput)
            .withOption(helpOpt).withOption(numMapTasksOpt).withOption(numReduceTasksOpt)
            .withOption(emitMostLikelyOpt).withOption(thresholdOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path clusters = new Path(cmdLine.getValue(clustersOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
        if (cmdLine.hasOption(measureClassOpt)) {
            measureClass = cmdLine.getValue(measureClassOpt).toString();
        }
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        float m = Float.parseFloat(cmdLine.getValue(mOpt).toString());

        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        int numMapTasks = Integer.parseInt(cmdLine.getValue(numMapTasksOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        boolean emitMostLikely = Boolean.parseBoolean(cmdLine.getValue(emitMostLikelyOpt).toString());
        double threshold = Double.parseDouble(cmdLine.getValue(thresholdOpt).toString());
        if (cmdLine.hasOption(kOpt)) {
            clusters = RandomSeedGenerator.buildRandom(input, clusters,
                    Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numMapTasks,
                numReduceTasks, m, cmdLine.hasOption(clusteringOpt), emitMostLikely, threshold);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:org.apache.mahout.clustering.kmeans.KMeansDriver.java

public static void main(String[] args) throws Exception {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option clustersOpt = DefaultOptionCreator.clustersInOption()
            .withDescription(/*  ww  w . j a  va  2 s  .c o m*/
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create();
    Option kOpt = DefaultOptionCreator.kOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option numReduceTasksOpt = DefaultOptionCreator.numReducersOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(clustersOpt)
            .withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt)
            .withOption(maxIterationsOpt).withOption(numReduceTasksOpt).withOption(kOpt)
            .withOption(overwriteOutput).withOption(helpOpt).withOption(clusteringOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path clusters = new Path(cmdLine.getValue(clustersOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = cmdLine.getValue(measureClassOpt).toString();
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString());
        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        if (cmdLine.hasOption(kOpt)) {
            clusters = RandomSeedGenerator.buildRandom(input, clusters,
                    Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks,
                cmdLine.hasOption(clusteringOpt));
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.lda.cvb.InMemoryCollapsedVariationalBayes0.java

public static int main2(String[] args, Configuration conf) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having "
                    + "one doc per line")
            .withShortName("i").create();

    Option dictOpt = obuilder.withLongName("dictionary").withRequired(false)
            .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create())
            .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create();

    Option dfsOpt = obuilder.withLongName("dfs").withRequired(false)
            .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS namenode URI").withShortName("dfs").create();

    Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true)
            .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of topics to learn").withShortName("top").create();

    Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true)
            .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(term | topic)").withShortName("to").create();

    Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true)
            .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(topic | docid)").withShortName("do").create();

    Option alphaOpt = obuilder.withLongName("alpha").withRequired(false)
            .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create();

    Option etaOpt = obuilder.withLongName("eta").withRequired(false)
            .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create();

    Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false)
            .withArgument(// w w w  .ja  v a2  s  . c  o m
                    abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault("10").create())
            .withDescription("Maximum number of training passes").withShortName("m").create();

    Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false)
            .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1)
                    .withDefault("0.0").create())
            .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|")
            .create();

    Option burnInOpt = obuilder
            .withLongName("burnInIterations").withRequired(false).withArgument(abuilder
                    .withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault("5").create())
            .withDescription("Minimum number of iterations").withShortName("b").create();

    Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false)
            .withArgument(
                    abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create())
            .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c")
            .create();

    Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false)
            .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no")
                    .create())
            .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt")
            .create();

    Option numTrainThreadsOpt = obuilder
            .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder
                    .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create())
            .withDescription("number of threads to train with").withShortName("ntt").create();

    Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false)
            .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1")
                    .create())
            .withDescription("number of threads to update the model with").withShortName("nut").create();

    Option verboseOpt = obuilder.withLongName("verbose").withRequired(false)
            .withArgument(
                    abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create())
            .withDescription("print verbose information, like top-terms in each topic, during iteration")
            .withShortName("v").create();

    Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt)
            .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt)
            .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt)
            .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt)
            .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt)
            .withOption(modelCorpusFractionOption).withOption(verboseOpt).create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String inputDirString = (String) cmdLine.getValue(inputDirOpt);
        String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null;
        int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt));
        double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt));
        double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt));
        int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt));
        int burnInIterations = Integer.parseInt((String) cmdLine.getValue(burnInOpt));
        double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt));
        int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt));
        int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt));
        String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt);
        String docOutFile = (String) cmdLine.getValue(outputDocFileOpt);
        //String reInferDocTopics = (String)cmdLine.getValue(reInferDocTopicsOpt);
        boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt));
        double modelCorpusFraction = Double.parseDouble((String) cmdLine.getValue(modelCorpusFractionOption));

        long start = System.nanoTime();

        if (conf.get("fs.default.name") == null) {
            String dfsNameNode = (String) cmdLine.getValue(dfsOpt);
            conf.set("fs.default.name", dfsNameNode);
        }
        String[] terms = loadDictionary(dictDirString, conf);
        logTime("dictionary loading", System.nanoTime() - start);
        start = System.nanoTime();
        Matrix corpus = loadVectors(inputDirString, conf);
        logTime("vector seqfile corpus loading", System.nanoTime() - start);
        start = System.nanoTime();
        InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms,
                numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction);
        logTime("cvb0 init", System.nanoTime() - start);

        start = System.nanoTime();
        cvb0.setVerbose(verbose);
        cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations);
        logTime("total training time", System.nanoTime() - start);

        /*
        if ("randstart".equalsIgnoreCase(reInferDocTopics)) {
          cvb0.inferDocuments(0.0, 100, true);
        } else if ("continue".equalsIgnoreCase(reInferDocTopics)) {
          cvb0.inferDocuments(0.0, 100, false);
        }
         */

        start = System.nanoTime();
        cvb0.writeModel(new Path(topicOutFile));
        DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts);
        logTime("printTopics", System.nanoTime() - start);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
    }
    return 0;
}

From source file:org.apache.mahout.clustering.lda.LDADriver.java

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option topicsOpt = DefaultOptionCreator.numTopicsOption().create();
    Option wordsOpt = DefaultOptionCreator.numWordsOption().create();
    Option topicSmOpt = DefaultOptionCreator.topicSmoothingOption().create();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create();
    Option numReducOpt = DefaultOptionCreator.numReducersOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(topicsOpt).withOption(wordsOpt).withOption(topicSmOpt).withOption(maxIterOpt)
            .withOption(numReducOpt).withOption(overwriteOutput).withOption(helpOpt).create();
    try {//w  w  w . j av  a 2s  .c  o m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReducOpt).toString());
        int numTopics = Integer.parseInt(cmdLine.getValue(topicsOpt).toString());
        int numWords = Integer.parseInt(cmdLine.getValue(wordsOpt).toString());
        double topicSmoothing = Double.parseDouble(cmdLine.getValue(maxIterOpt).toString());
        if (topicSmoothing < 1) {
            topicSmoothing = 50.0 / numTopics;
        }

        runJob(input, output, numTopics, numWords, topicSmoothing, maxIterations, numReduceTasks);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.lda.LDAPrintTopics.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option dictOpt = obuilder.withLongName("dict").withRequired(true)
            .withArgument(abuilder.withName("dict").withMinimum(1).withMaximum(1).create())
            .withDescription("Dictionary to read in, in the same format as one created by "
                    + "org.apache.mahout.utils.vectors.lucene.Driver")
            .withShortName("d").create();

    Option outOpt = DefaultOptionCreator.outputOption().create();

    Option wordOpt = obuilder.withLongName("words").withRequired(false)
            .withArgument(abuilder.withName("words").withMinimum(0).withMaximum(1).withDefault("20").create())
            .withDescription("Number of words to print").withShortName("w").create();
    Option dictTypeOpt = obuilder.withLongName("dictionaryType").withRequired(false)
            .withArgument(abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create())
            .withDescription("The dictionary file type (text|sequencefile)").withShortName("dt").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();/*from   w ww  .  ja  v  a 2 s  . c  o m*/

    Group group = gbuilder.withName("Options").withOption(dictOpt).withOption(outOpt).withOption(wordOpt)
            .withOption(inputOpt).withOption(dictTypeOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String input = cmdLine.getValue(inputOpt).toString();
        String dictFile = cmdLine.getValue(dictOpt).toString();
        int numWords = 20;
        if (cmdLine.hasOption(wordOpt)) {
            numWords = Integer.parseInt(cmdLine.getValue(wordOpt).toString());
        }
        Configuration config = new Configuration();

        String dictionaryType = "text";
        if (cmdLine.hasOption(dictTypeOpt)) {
            dictionaryType = cmdLine.getValue(dictTypeOpt).toString();
        }

        List<String> wordList;
        if ("text".equals(dictionaryType)) {
            wordList = Arrays.asList(VectorHelper.loadTermDictionary(new File(dictFile)));
        } else if ("sequencefile".equals(dictionaryType)) {
            wordList = Arrays.asList(VectorHelper.loadTermDictionary(config, dictFile));
        } else {
            throw new IllegalArgumentException("Invalid dictionary format");
        }

        List<Queue<Pair<String, Double>>> topWords = topWordsForTopics(input, config, wordList, numWords);

        File output = null;
        if (cmdLine.hasOption(outOpt)) {
            output = new File(cmdLine.getValue(outOpt).toString());
            if (!output.exists() && !output.mkdirs()) {
                throw new IOException("Could not create directory: " + output);
            }
        }
        printTopWords(topWords, output);
    } catch (OptionException e) {
        CommandLineUtil.printHelp(group);
        throw e;
    }
}

From source file:org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver.java

public static void main(String[] args) throws IOException {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option inputIsCanopiesOpt = DefaultOptionCreator.inputIsCanopiesOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option threshold1Opt = DefaultOptionCreator.t1Option().create();
    Option threshold2Opt = DefaultOptionCreator.t2Option().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(overwriteOutput).withOption(measureClassOpt).withOption(helpOpt)
            .withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(threshold2Opt)
            .withOption(clusteringOpt).withOption(maxIterOpt).withOption(inputIsCanopiesOpt).create();

    try {//w ww.j  a v  a 2  s .  c o  m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = cmdLine.getValue(measureClassOpt).toString();
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString());
        double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations,
                cmdLine.hasOption(inputIsCanopiesOpt), cmdLine.hasOption(clusteringOpt));
    } catch (OptionException e) {
        log.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
    Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
    Option vectorOpt = obuilder.withLongName("vector").withRequired(false)
            .withArgument(abuilder.withName("v").withMinimum(1).withMaximum(1).create())
            .withDescription("The vector implementation to use.").withShortName("v").create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(vectorOpt)
            .withOption(helpOpt).create();

    try {/*from   w w w  .  j a  v  a  2  s  . c  o  m*/
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
        Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
        String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector")
                .toString();
        runJob(input, output, vectorClassName);
    } catch (OptionException e) {
        InputDriver.LOG.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.syntheticcontrol.canopy.Job.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("input").withRequired(false)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector")
            .withShortName("i").create();
    Option outputOpt = obuilder.withLongName("output").withRequired(false)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The Path to put the output in").withShortName("o").create();

    Option measureClassOpt = obuilder.withLongName("distance").withRequired(false)
            .withArgument(abuilder.withName("distance").withMinimum(1).withMaximum(1).create())
            .withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m")
            .create();//from  w  w  w  . j a  v  a 2 s.c o m
    // Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
    // abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
    // withDescription("The Vector implementation class name.  Default is RandomAccessSparseVector.class")
    // .withShortName("v").create();

    Option t1Opt = obuilder.withLongName("t1").withRequired(false)
            .withArgument(abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1")
            .withShortName("t1").create();
    Option t2Opt = obuilder.withLongName("t2").withRequired(false)
            .withArgument(abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2")
            .withShortName("t2").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(measureClassOpt)// .withOption(vectorClassOpt)
            .withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
        Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
        String measureClass = cmdLine
                .getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure")
                .toString();

        // String className = cmdLine.getValue(vectorClassOpt,
        // "org.apache.mahout.math.RandomAccessSparseVector").toString();
        // Class<? extends Vector> vectorClass = Class.forName(className).asSubclass(Vector.class);
        double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
        double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());

        runJob(input, output, measureClass, t1, t2);
    } catch (OptionException e) {
        Job.log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.syntheticcontrol.kmeans.Job.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
    Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().withRequired(false).create();
    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create();

    Option measureClassOpt = obuilder.withLongName("distance").withRequired(false)
            .withArgument(abuilder.withName("distance").withMinimum(1).withMaximum(1).create())
            .withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m")
            .create();// w w  w  .ja  v  a 2 s . c o  m

    Option t1Opt = obuilder.withLongName("t1").withRequired(false)
            .withArgument(abuilder.withName("t1").withMinimum(1).withMaximum(1).create())
            .withDescription("The t1 value to use.").withShortName("m").create();
    Option t2Opt = obuilder.withLongName("t2").withRequired(false)
            .withArgument(abuilder.withName("t2").withMinimum(1).withMaximum(1).create())
            .withDescription("The t2 value to use.").withShortName("m").create();
    Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false)
            .withArgument(abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create())
            .withDescription("The Vector implementation class name.  Default is RandomAccessSparseVector.class")
            .withShortName("v").create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(measureClassOpt).withOption(convergenceDeltaOpt).withOption(maxIterationsOpt)
            .withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
        Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
        String measureClass = cmdLine
                .getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure")
                .toString();
        double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
        double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt, 10).toString());

        runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations);
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}