Example usage for org.apache.commons.cli2.commandline Parser parse

Introduction

In this page you can find the example usage for org.apache.commons.cli2.commandline Parser parse.

Prototype

public CommandLine parse(final String[] arguments) throws OptionException

Source Link

Document

Parse the arguments according to the specified options and properties.

Usage

From source file:org.apache.mahout.clustering.dirichlet.DirichletDriver.java

public static void main(String[] args) throws Exception {
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option kOpt = DefaultOptionCreator.kOption().withRequired(true).create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option alphaOpt = DefaultOptionCreator.alphaOption().create();
    Option modelDistOpt = DefaultOptionCreator.modelDistributionOption().create();
    Option prototypeOpt = DefaultOptionCreator.modelPrototypeOption().create();
    Option numRedOpt = DefaultOptionCreator.numReducersOption().create();
    Option emitMostLikelyOpt = DefaultOptionCreator.emitMostLikelyOption().create();
    Option thresholdOpt = DefaultOptionCreator.thresholdOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(overwriteOutput).withOption(modelDistOpt).withOption(prototypeOpt)
            .withOption(maxIterOpt).withOption(alphaOpt).withOption(kOpt).withOption(helpOpt)
            .withOption(numRedOpt).withOption(clusteringOpt).withOption(emitMostLikelyOpt)
            .withOption(thresholdOpt).create();

    try {//from w  w w  . j a va  2  s  .co  m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        String modelFactory = cmdLine.getValue(modelDistOpt).toString();
        String modelPrototype = cmdLine.getValue(prototypeOpt).toString();
        int numModels = Integer.parseInt(cmdLine.getValue(kOpt).toString());
        int numReducers = Integer.parseInt(cmdLine.getValue(numRedOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        boolean emitMostLikely = Boolean.parseBoolean(cmdLine.getValue(emitMostLikelyOpt).toString());
        double threshold = Double.parseDouble(cmdLine.getValue(thresholdOpt).toString());
        double alpha_0 = Double.parseDouble(cmdLine.getValue(alphaOpt).toString());

        runJob(input, output, modelFactory, modelPrototype, numModels, maxIterations, alpha_0, numReducers,
                cmdLine.hasOption(clusteringOpt), emitMostLikely, threshold);
    } catch (OptionException e) {
        log.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver.java

public static void main(String[] args) throws Exception {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option clustersOpt = DefaultOptionCreator.clustersInOption()
            .withDescription(//  w w w.  j av  a  2s.  com
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create();
    Option kOpt = DefaultOptionCreator.kOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option mOpt = DefaultOptionCreator.mOption().create();
    Option numReduceTasksOpt = DefaultOptionCreator.numReducersOption().create();
    Option numMapTasksOpt = DefaultOptionCreator.numMappersOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option emitMostLikelyOpt = DefaultOptionCreator.emitMostLikelyOption().create();
    Option thresholdOpt = DefaultOptionCreator.thresholdOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(clustersOpt)
            .withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt)
            .withOption(maxIterationsOpt).withOption(kOpt).withOption(mOpt).withOption(overwriteOutput)
            .withOption(helpOpt).withOption(numMapTasksOpt).withOption(numReduceTasksOpt)
            .withOption(emitMostLikelyOpt).withOption(thresholdOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path clusters = new Path(cmdLine.getValue(clustersOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
        if (cmdLine.hasOption(measureClassOpt)) {
            measureClass = cmdLine.getValue(measureClassOpt).toString();
        }
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        float m = Float.parseFloat(cmdLine.getValue(mOpt).toString());

        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        int numMapTasks = Integer.parseInt(cmdLine.getValue(numMapTasksOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        boolean emitMostLikely = Boolean.parseBoolean(cmdLine.getValue(emitMostLikelyOpt).toString());
        double threshold = Double.parseDouble(cmdLine.getValue(thresholdOpt).toString());
        if (cmdLine.hasOption(kOpt)) {
            clusters = RandomSeedGenerator.buildRandom(input, clusters,
                    Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numMapTasks,
                numReduceTasks, m, cmdLine.hasOption(clusteringOpt), emitMostLikely, threshold);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:org.apache.mahout.clustering.kmeans.KMeansDriver.java

public static void main(String[] args) throws Exception {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option clustersOpt = DefaultOptionCreator.clustersInOption()
            .withDescription(/*  ww  w . j a  va  2 s  .c o m*/
                    "The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
                            + "If k is also specified, then a random set of vectors will be selected"
                            + " and written out to this path first")
            .create();
    Option kOpt = DefaultOptionCreator.kOption()
            .withDescription(
                    "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
                            + " as the Centroid and written to the clusters input path.")
            .create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option numReduceTasksOpt = DefaultOptionCreator.numReducersOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(clustersOpt)
            .withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt)
            .withOption(maxIterationsOpt).withOption(numReduceTasksOpt).withOption(kOpt)
            .withOption(overwriteOutput).withOption(helpOpt).withOption(clusteringOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path clusters = new Path(cmdLine.getValue(clustersOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = cmdLine.getValue(measureClassOpt).toString();
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString());
        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        if (cmdLine.hasOption(kOpt)) {
            clusters = RandomSeedGenerator.buildRandom(input, clusters,
                    Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks,
                cmdLine.hasOption(clusteringOpt));
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.lda.cvb.InMemoryCollapsedVariationalBayes0.java

public static int main2(String[] args, Configuration conf) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having "
                    + "one doc per line")
            .withShortName("i").create();

    Option dictOpt = obuilder.withLongName("dictionary").withRequired(false)
            .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create())
            .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create();

    Option dfsOpt = obuilder.withLongName("dfs").withRequired(false)
            .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS namenode URI").withShortName("dfs").create();

    Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true)
            .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of topics to learn").withShortName("top").create();

    Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true)
            .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(term | topic)").withShortName("to").create();

    Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true)
            .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(topic | docid)").withShortName("do").create();

    Option alphaOpt = obuilder.withLongName("alpha").withRequired(false)
            .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create();

    Option etaOpt = obuilder.withLongName("eta").withRequired(false)
            .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create();

    Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false)
            .withArgument(// w w w  .ja  v a2  s  . c  o m
                    abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault("10").create())
            .withDescription("Maximum number of training passes").withShortName("m").create();

    Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false)
            .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1)
                    .withDefault("0.0").create())
            .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|")
            .create();

    Option burnInOpt = obuilder
            .withLongName("burnInIterations").withRequired(false).withArgument(abuilder
                    .withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault("5").create())
            .withDescription("Minimum number of iterations").withShortName("b").create();

    Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false)
            .withArgument(
                    abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create())
            .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c")
            .create();

    Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false)
            .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no")
                    .create())
            .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt")
            .create();

    Option numTrainThreadsOpt = obuilder
            .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder
                    .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create())
            .withDescription("number of threads to train with").withShortName("ntt").create();

    Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false)
            .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1")
                    .create())
            .withDescription("number of threads to update the model with").withShortName("nut").create();

    Option verboseOpt = obuilder.withLongName("verbose").withRequired(false)
            .withArgument(
                    abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create())
            .withDescription("print verbose information, like top-terms in each topic, during iteration")
            .withShortName("v").create();

    Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt)
            .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt)
            .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt)
            .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt)
            .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt)
            .withOption(modelCorpusFractionOption).withOption(verboseOpt).create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String inputDirString = (String) cmdLine.getValue(inputDirOpt);
        String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null;
        int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt));
        double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt));
        double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt));
        int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt));
        int burnInIterations = Integer.parseInt((String) cmdLine.getValue(burnInOpt));
        double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt));
        int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt));
        int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt));
        String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt);
        String docOutFile = (String) cmdLine.getValue(outputDocFileOpt);
        //String reInferDocTopics = (String)cmdLine.getValue(reInferDocTopicsOpt);
        boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt));
        double modelCorpusFraction = Double.parseDouble((String) cmdLine.getValue(modelCorpusFractionOption));

        long start = System.nanoTime();

        if (conf.get("fs.default.name") == null) {
            String dfsNameNode = (String) cmdLine.getValue(dfsOpt);
            conf.set("fs.default.name", dfsNameNode);
        }
        String[] terms = loadDictionary(dictDirString, conf);
        logTime("dictionary loading", System.nanoTime() - start);
        start = System.nanoTime();
        Matrix corpus = loadVectors(inputDirString, conf);
        logTime("vector seqfile corpus loading", System.nanoTime() - start);
        start = System.nanoTime();
        InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms,
                numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction);
        logTime("cvb0 init", System.nanoTime() - start);

        start = System.nanoTime();
        cvb0.setVerbose(verbose);
        cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations);
        logTime("total training time", System.nanoTime() - start);

        /*
        if ("randstart".equalsIgnoreCase(reInferDocTopics)) {
          cvb0.inferDocuments(0.0, 100, true);
        } else if ("continue".equalsIgnoreCase(reInferDocTopics)) {
          cvb0.inferDocuments(0.0, 100, false);
        }
         */

        start = System.nanoTime();
        cvb0.writeModel(new Path(topicOutFile));
        DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts);
        logTime("printTopics", System.nanoTime() - start);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
    }
    return 0;
}

From source file:org.apache.mahout.clustering.lda.LDADriver.java

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option topicsOpt = DefaultOptionCreator.numTopicsOption().create();
    Option wordsOpt = DefaultOptionCreator.numWordsOption().create();
    Option topicSmOpt = DefaultOptionCreator.topicSmoothingOption().create();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create();
    Option numReducOpt = DefaultOptionCreator.numReducersOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(topicsOpt).withOption(wordsOpt).withOption(topicSmOpt).withOption(maxIterOpt)
            .withOption(numReducOpt).withOption(overwriteOutput).withOption(helpOpt).create();
    try {//w  w  w . j av  a 2s  .c  o m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReducOpt).toString());
        int numTopics = Integer.parseInt(cmdLine.getValue(topicsOpt).toString());
        int numWords = Integer.parseInt(cmdLine.getValue(wordsOpt).toString());
        double topicSmoothing = Double.parseDouble(cmdLine.getValue(maxIterOpt).toString());
        if (topicSmoothing < 1) {
            topicSmoothing = 50.0 / numTopics;
        }

        runJob(input, output, numTopics, numWords, topicSmoothing, maxIterations, numReduceTasks);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.lda.LDAPrintTopics.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option dictOpt = obuilder.withLongName("dict").withRequired(true)
            .withArgument(abuilder.withName("dict").withMinimum(1).withMaximum(1).create())
            .withDescription("Dictionary to read in, in the same format as one created by "
                    + "org.apache.mahout.utils.vectors.lucene.Driver")
            .withShortName("d").create();

    Option outOpt = DefaultOptionCreator.outputOption().create();

    Option wordOpt = obuilder.withLongName("words").withRequired(false)
            .withArgument(abuilder.withName("words").withMinimum(0).withMaximum(1).withDefault("20").create())
            .withDescription("Number of words to print").withShortName("w").create();
    Option dictTypeOpt = obuilder.withLongName("dictionaryType").withRequired(false)
            .withArgument(abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create())
            .withDescription("The dictionary file type (text|sequencefile)").withShortName("dt").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();/*from   w ww  .  ja  v  a 2 s  . c  o m*/

    Group group = gbuilder.withName("Options").withOption(dictOpt).withOption(outOpt).withOption(wordOpt)
            .withOption(inputOpt).withOption(dictTypeOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String input = cmdLine.getValue(inputOpt).toString();
        String dictFile = cmdLine.getValue(dictOpt).toString();
        int numWords = 20;
        if (cmdLine.hasOption(wordOpt)) {
            numWords = Integer.parseInt(cmdLine.getValue(wordOpt).toString());
        }
        Configuration config = new Configuration();

        String dictionaryType = "text";
        if (cmdLine.hasOption(dictTypeOpt)) {
            dictionaryType = cmdLine.getValue(dictTypeOpt).toString();
        }

        List<String> wordList;
        if ("text".equals(dictionaryType)) {
            wordList = Arrays.asList(VectorHelper.loadTermDictionary(new File(dictFile)));
        } else if ("sequencefile".equals(dictionaryType)) {
            wordList = Arrays.asList(VectorHelper.loadTermDictionary(config, dictFile));
        } else {
            throw new IllegalArgumentException("Invalid dictionary format");
        }

        List<Queue<Pair<String, Double>>> topWords = topWordsForTopics(input, config, wordList, numWords);

        File output = null;
        if (cmdLine.hasOption(outOpt)) {
            output = new File(cmdLine.getValue(outOpt).toString());
            if (!output.exists() && !output.mkdirs()) {
                throw new IOException("Could not create directory: " + output);
            }
        }
        printTopWords(topWords, output);
    } catch (OptionException e) {
        CommandLineUtil.printHelp(group);
        throw e;
    }
}

From source file:org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver.java

public static void main(String[] args) throws IOException {
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option inputIsCanopiesOpt = DefaultOptionCreator.inputIsCanopiesOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option threshold1Opt = DefaultOptionCreator.t1Option().create();
    Option threshold2Opt = DefaultOptionCreator.t2Option().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(overwriteOutput).withOption(measureClassOpt).withOption(helpOpt)
            .withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(threshold2Opt)
            .withOption(clusteringOpt).withOption(maxIterOpt).withOption(inputIsCanopiesOpt).create();

    try {//w ww.j  a v  a 2  s .  c o  m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String measureClass = cmdLine.getValue(measureClassOpt).toString();
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString());
        double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations,
                cmdLine.hasOption(inputIsCanopiesOpt), cmdLine.hasOption(clusteringOpt));
    } catch (OptionException e) {
        log.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
    Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
    Option vectorOpt = obuilder.withLongName("vector").withRequired(false)
            .withArgument(abuilder.withName("v").withMinimum(1).withMaximum(1).create())
            .withDescription("The vector implementation to use.").withShortName("v").create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(vectorOpt)
            .withOption(helpOpt).create();

    try {/*from   w w w  .  j a  v  a  2  s  . c  o  m*/
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
        Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
        String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector")
                .toString();
        runJob(input, output, vectorClassName);
    } catch (OptionException e) {
        InputDriver.LOG.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.syntheticcontrol.canopy.Job.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("input").withRequired(false)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector")
            .withShortName("i").create();
    Option outputOpt = obuilder.withLongName("output").withRequired(false)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The Path to put the output in").withShortName("o").create();

    Option measureClassOpt = obuilder.withLongName("distance").withRequired(false)
            .withArgument(abuilder.withName("distance").withMinimum(1).withMaximum(1).create())
            .withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m")
            .create();//from  w  w  w  . j a  v  a 2 s.c o m
    // Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
    // abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
    // withDescription("The Vector implementation class name.  Default is RandomAccessSparseVector.class")
    // .withShortName("v").create();

    Option t1Opt = obuilder.withLongName("t1").withRequired(false)
            .withArgument(abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1")
            .withShortName("t1").create();
    Option t2Opt = obuilder.withLongName("t2").withRequired(false)
            .withArgument(abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2")
            .withShortName("t2").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(measureClassOpt)// .withOption(vectorClassOpt)
            .withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
        Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
        String measureClass = cmdLine
                .getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure")
                .toString();

        // String className = cmdLine.getValue(vectorClassOpt,
        // "org.apache.mahout.math.RandomAccessSparseVector").toString();
        // Class<? extends Vector> vectorClass = Class.forName(className).asSubclass(Vector.class);
        double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
        double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());

        runJob(input, output, measureClass, t1, t2);
    } catch (OptionException e) {
        Job.log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.syntheticcontrol.kmeans.Job.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
    Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().withRequired(false).create();
    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create();

    Option measureClassOpt = obuilder.withLongName("distance").withRequired(false)
            .withArgument(abuilder.withName("distance").withMinimum(1).withMaximum(1).create())
            .withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m")
            .create();// w w  w  .ja  v  a 2 s . c o  m

    Option t1Opt = obuilder.withLongName("t1").withRequired(false)
            .withArgument(abuilder.withName("t1").withMinimum(1).withMaximum(1).create())
            .withDescription("The t1 value to use.").withShortName("m").create();
    Option t2Opt = obuilder.withLongName("t2").withRequired(false)
            .withArgument(abuilder.withName("t2").withMinimum(1).withMaximum(1).create())
            .withDescription("The t2 value to use.").withShortName("m").create();
    Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false)
            .withArgument(abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create())
            .withDescription("The Vector implementation class name.  Default is RandomAccessSparseVector.class")
            .withShortName("v").create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(measureClassOpt).withOption(convergenceDeltaOpt).withOption(maxIterationsOpt)
            .withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
        Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
        String measureClass = cmdLine
                .getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure")
                .toString();
        double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
        double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
        double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt, 10).toString());

        runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations);
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}