Example usage for org.apache.commons.cli2.commandline Parser parse

List of usage examples for org.apache.commons.cli2.commandline Parser parse

Introduction

In this page you can find the example usage for org.apache.commons.cli2.commandline Parser parse.

Prototype

public CommandLine parse(final String[] arguments) throws OptionException 

Source Link

Document

Parse the arguments according to the specified options and properties.

Usage

From source file:org.apache.mahout.utils.vectors.lucene.Driver.java

public static void main(String[] args) throws IOException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("dir").withRequired(true)
            .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create())
            .withDescription("The Lucene directory").withShortName("d").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The output file").withShortName("o").create();

    Option fieldOpt = obuilder.withLongName("field").withRequired(true)
            .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create())
            .withDescription("The field in the index").withShortName("f").create();

    Option idFieldOpt = obuilder.withLongName("idField").withRequired(false)
            .withArgument(abuilder.withName("idField").withMinimum(1).withMaximum(1).create())
            .withDescription(/*from w w  w.  ja v a 2  s. c  om*/
                    "The field in the index containing the index.  If null, then the Lucene internal doc "
                            + "id is used which is prone to error if the underlying index changes")
            .create();

    Option dictOutOpt = obuilder.withLongName("dictOut").withRequired(true)
            .withArgument(abuilder.withName("dictOut").withMinimum(1).withMaximum(1).create())
            .withDescription("The output of the dictionary").withShortName("t").create();

    Option seqDictOutOpt = obuilder.withLongName("seqDictOut").withRequired(false)
            .withArgument(abuilder.withName("seqDictOut").withMinimum(1).withMaximum(1).create())
            .withDescription("The output of the dictionary as sequence file").withShortName("st").create();

    Option weightOpt = obuilder.withLongName("weight").withRequired(false)
            .withArgument(abuilder.withName("weight").withMinimum(1).withMaximum(1).create())
            .withDescription("The kind of weight to use. Currently TF or TFIDF").withShortName("w").create();

    Option delimiterOpt = obuilder.withLongName("delimiter").withRequired(false)
            .withArgument(abuilder.withName("delimiter").withMinimum(1).withMaximum(1).create())
            .withDescription("The delimiter for outputting the dictionary").withShortName("l").create();

    Option powerOpt = obuilder.withLongName("norm").withRequired(false)
            .withArgument(abuilder.withName("norm").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The norm to use, expressed as either a double or \"INF\" if you want to use the Infinite norm.  "
                            + "Must be greater or equal to 0.  The default is not to normalize")
            .withShortName("n").create();

    Option maxOpt = obuilder.withLongName("max").withRequired(false)
            .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The maximum number of vectors to output.  If not specified, then it will loop over all docs")
            .withShortName("m").create();

    Option minDFOpt = obuilder.withLongName("minDF").withRequired(false)
            .withArgument(abuilder.withName("minDF").withMinimum(1).withMaximum(1).create())
            .withDescription("The minimum document frequency.  Default is 1").withShortName("md").create();

    Option maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false)
            .withArgument(abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The max percentage of docs for the DF.  Can be used to remove really high frequency terms."
                            + "  Expressed as an integer between 0 and 100. Default is 99.")
            .withShortName("x").create();

    Option maxPercentErrorDocsOpt = obuilder.withLongName("maxPercentErrorDocs").withRequired(false)
            .withArgument(abuilder.withName("maxPercentErrorDocs").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The max percentage of docs that can have a null term vector. These are noise document and can occur if the "
                            + "analyzer used strips out all terms in the target field. This percentage is expressed as a value "
                            + "between 0 and 1. The default is 0.")
            .withShortName("err").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(idFieldOpt).withOption(outputOpt)
            .withOption(delimiterOpt).withOption(helpOpt).withOption(fieldOpt).withOption(maxOpt)
            .withOption(dictOutOpt).withOption(seqDictOutOpt).withOption(powerOpt).withOption(maxDFPercentOpt)
            .withOption(weightOpt).withOption(minDFOpt).withOption(maxPercentErrorDocsOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {

            CommandLineUtil.printHelp(group);
            return;
        }

        if (cmdLine.hasOption(inputOpt)) { // Lucene case
            Driver luceneDriver = new Driver();
            luceneDriver.setLuceneDir(cmdLine.getValue(inputOpt).toString());

            if (cmdLine.hasOption(maxOpt)) {
                luceneDriver.setMaxDocs(Long.parseLong(cmdLine.getValue(maxOpt).toString()));
            }

            if (cmdLine.hasOption(weightOpt)) {
                luceneDriver.setWeightType(cmdLine.getValue(weightOpt).toString());
            }

            luceneDriver.setField(cmdLine.getValue(fieldOpt).toString());

            if (cmdLine.hasOption(minDFOpt)) {
                luceneDriver.setMinDf(Integer.parseInt(cmdLine.getValue(minDFOpt).toString()));
            }

            if (cmdLine.hasOption(maxDFPercentOpt)) {
                luceneDriver.setMaxDFPercent(Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString()));
            }

            if (cmdLine.hasOption(powerOpt)) {
                String power = cmdLine.getValue(powerOpt).toString();
                if ("INF".equals(power)) {
                    luceneDriver.setNorm(Double.POSITIVE_INFINITY);
                } else {
                    luceneDriver.setNorm(Double.parseDouble(power));
                }
            }

            if (cmdLine.hasOption(idFieldOpt)) {
                luceneDriver.setIdField(cmdLine.getValue(idFieldOpt).toString());
            }

            if (cmdLine.hasOption(maxPercentErrorDocsOpt)) {
                luceneDriver.setMaxPercentErrorDocs(
                        Double.parseDouble(cmdLine.getValue(maxPercentErrorDocsOpt).toString()));
            }

            luceneDriver.setOutFile(cmdLine.getValue(outputOpt).toString());

            luceneDriver.setDelimiter(
                    cmdLine.hasOption(delimiterOpt) ? cmdLine.getValue(delimiterOpt).toString() : "\t");

            luceneDriver.setDictOut(cmdLine.getValue(dictOutOpt).toString());

            if (cmdLine.hasOption(seqDictOutOpt)) {
                luceneDriver.setSeqDictOut(cmdLine.getValue(seqDictOutOpt).toString());
            }

            luceneDriver.dumpVectors();
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.utils.vectors.lucene.SeqFilePrint.java

public static void main(String[] args) throws OptionException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("inputFile").withRequired(true)
            .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The output of the dictionary as sequence file").withShortName("inputFile")
            .create();/*from w w  w .j  a  va 2 s . co m*/

    Option outFileOpt = obuilder.withLongName("outFile").withRequired(true)
            .withArgument(abuilder.withName("outfolder").withMinimum(1).withMaximum(1).create())
            .withDescription("The output of the dictionary as sequence file").withShortName("outFile").create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outFileOpt).create();

    SeqFilePrint seqFilePrint = new SeqFilePrint();
    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine = parser.parse(args);
    if (cmdLine.hasOption(inputOpt)) {
        seqFilePrint.setInputSeqFile(cmdLine.getValue(inputOpt).toString());
    }
    if (cmdLine.hasOption(outFileOpt)) {
        seqFilePrint.setOutFile(cmdLine.getValue(outFileOpt).toString());
    }
    try {
        seqFilePrint.run(args);
    } catch (Exception ex) {
        Logger.getLogger(SeqFilePrint.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles.java

@Override
public int run(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputDirOpt = DefaultOptionCreator.inputOption().create();

    Option outputDirOpt = DefaultOptionCreator.outputOption().create();

    Option minSupportOpt = obuilder.withLongName("minSupport")
            .withArgument(abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) Minimum Support. Default Value: 2").withShortName("s").create();

    Option analyzerNameOpt = obuilder.withLongName("analyzerName")
            .withArgument(abuilder.withName("analyzerName").withMinimum(1).withMaximum(1).create())
            .withDescription("The class name of the analyzer").withShortName("a").create();

    Option chunkSizeOpt = obuilder.withLongName("chunkSize")
            .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The chunkSize in MegaBytes. Default Value: 100MB").withShortName("chunk")
            .create();/* ww  w  . j av  a 2s .  c o m*/

    Option weightOpt = obuilder.withLongName("weight").withRequired(false)
            .withArgument(abuilder.withName("weight").withMinimum(1).withMaximum(1).create())
            .withDescription("The kind of weight to use. Currently TF or TFIDF. Default: TFIDF")
            .withShortName("wt").create();

    Option minDFOpt = obuilder.withLongName("minDF").withRequired(false)
            .withArgument(abuilder.withName("minDF").withMinimum(1).withMaximum(1).create())
            .withDescription("The minimum document frequency.  Default is 1").withShortName("md").create();

    Option maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false)
            .withArgument(abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The max percentage of docs for the DF.  Can be used to remove really high frequency terms."
                            + " Expressed as an integer between 0 and 100. Default is 99.  If maxDFSigma is also set, "
                            + "it will override this value.")
            .withShortName("x").create();

    Option maxDFSigmaOpt = obuilder.withLongName("maxDFSigma").withRequired(false)
            .withArgument(abuilder.withName("maxDFSigma").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "What portion of the tf (tf-idf) vectors to be used, expressed in times the standard deviation (sigma) "
                            + "of the document frequencies of these vectors. Can be used to remove really high frequency terms."
                            + " Expressed as a double value. Good value to be specified is 3.0. In case the value is less "
                            + "than 0 no vectors will be filtered out. Default is -1.0.  Overrides maxDFPercent")
            .withShortName("xs").create();

    Option minLLROpt = obuilder.withLongName("minLLR").withRequired(false)
            .withArgument(abuilder.withName("minLLR").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional)The minimum Log Likelihood Ratio(Float)  Default is "
                    + LLRReducer.DEFAULT_MIN_LLR)
            .withShortName("ml").create();

    Option numReduceTasksOpt = obuilder.withLongName("numReducers")
            .withArgument(abuilder.withName("numReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) Number of reduce tasks. Default Value: 1").withShortName("nr")
            .create();

    Option powerOpt = obuilder.withLongName("norm").withRequired(false)
            .withArgument(abuilder.withName("norm").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The norm to use, expressed as either a float or \"INF\" if you want to use the Infinite norm.  "
                            + "Must be greater or equal to 0.  The default is not to normalize")
            .withShortName("n").create();

    Option logNormalizeOpt = obuilder.withLongName("logNormalize").withRequired(false)
            .withDescription("(Optional) Whether output vectors should be logNormalize. If set true else false")
            .withShortName("lnorm").create();

    Option maxNGramSizeOpt = obuilder.withLongName("maxNGramSize").withRequired(false)
            .withArgument(abuilder.withName("ngramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) The maximum size of ngrams to create"
                    + " (2 = bigrams, 3 = trigrams, etc) Default Value:1")
            .withShortName("ng").create();

    Option sequentialAccessVectorOpt = obuilder.withLongName("sequentialAccessVector").withRequired(false)
            .withDescription(
                    "(Optional) Whether output vectors should be SequentialAccessVectors. If set true else false")
            .withShortName("seq").create();

    Option namedVectorOpt = obuilder.withLongName("namedVector").withRequired(false)
            .withDescription("(Optional) Whether output vectors should be NamedVectors. If set true else false")
            .withShortName("nv").create();

    Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false)
            .withDescription("If set, overwrite the output directory").withShortName("ow").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(minSupportOpt).withOption(analyzerNameOpt)
            .withOption(chunkSizeOpt).withOption(outputDirOpt).withOption(inputDirOpt).withOption(minDFOpt)
            .withOption(maxDFSigmaOpt).withOption(maxDFPercentOpt).withOption(weightOpt).withOption(powerOpt)
            .withOption(minLLROpt).withOption(numReduceTasksOpt).withOption(maxNGramSizeOpt)
            .withOption(overwriteOutput).withOption(helpOpt).withOption(sequentialAccessVectorOpt)
            .withOption(namedVectorOpt).withOption(logNormalizeOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        Path inputDir = new Path((String) cmdLine.getValue(inputDirOpt));
        Path outputDir = new Path((String) cmdLine.getValue(outputDirOpt));

        int chunkSize = 100;
        if (cmdLine.hasOption(chunkSizeOpt)) {
            chunkSize = Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt));
        }
        int minSupport = 2;
        if (cmdLine.hasOption(minSupportOpt)) {
            String minSupportString = (String) cmdLine.getValue(minSupportOpt);
            minSupport = Integer.parseInt(minSupportString);
        }

        int maxNGramSize = 1;

        if (cmdLine.hasOption(maxNGramSizeOpt)) {
            try {
                maxNGramSize = Integer.parseInt(cmdLine.getValue(maxNGramSizeOpt).toString());
            } catch (NumberFormatException ex) {
                log.warn("Could not parse ngram size option");
            }
        }
        log.info("Maximum n-gram size is: {}", maxNGramSize);

        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.delete(getConf(), outputDir);
        }

        float minLLRValue = LLRReducer.DEFAULT_MIN_LLR;
        if (cmdLine.hasOption(minLLROpt)) {
            minLLRValue = Float.parseFloat(cmdLine.getValue(minLLROpt).toString());
        }
        log.info("Minimum LLR value: {}", minLLRValue);

        int reduceTasks = 1;
        if (cmdLine.hasOption(numReduceTasksOpt)) {
            reduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        }
        log.info("Number of reduce tasks: {}", reduceTasks);

        Class<? extends Analyzer> analyzerClass = StandardAnalyzer.class;
        if (cmdLine.hasOption(analyzerNameOpt)) {
            String className = cmdLine.getValue(analyzerNameOpt).toString();
            analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
            // try instantiating it, b/c there isn't any point in setting it if
            // you can't instantiate it
            AnalyzerUtils.createAnalyzer(analyzerClass);
        }

        boolean processIdf;

        if (cmdLine.hasOption(weightOpt)) {
            String wString = cmdLine.getValue(weightOpt).toString();
            if ("tf".equalsIgnoreCase(wString)) {
                processIdf = false;
            } else if ("tfidf".equalsIgnoreCase(wString)) {
                processIdf = true;
            } else {
                throw new OptionException(weightOpt);
            }
        } else {
            processIdf = true;
        }

        int minDf = 1;
        if (cmdLine.hasOption(minDFOpt)) {
            minDf = Integer.parseInt(cmdLine.getValue(minDFOpt).toString());
        }
        int maxDFPercent = 99;
        if (cmdLine.hasOption(maxDFPercentOpt)) {
            maxDFPercent = Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString());
        }
        double maxDFSigma = -1.0;
        if (cmdLine.hasOption(maxDFSigmaOpt)) {
            maxDFSigma = Double.parseDouble(cmdLine.getValue(maxDFSigmaOpt).toString());
        }

        float norm = PartialVectorMerger.NO_NORMALIZING;
        if (cmdLine.hasOption(powerOpt)) {
            String power = cmdLine.getValue(powerOpt).toString();
            if ("INF".equals(power)) {
                norm = Float.POSITIVE_INFINITY;
            } else {
                norm = Float.parseFloat(power);
            }
        }

        boolean logNormalize = false;
        if (cmdLine.hasOption(logNormalizeOpt)) {
            logNormalize = true;
        }
        log.info("Tokenizing documents in {}", inputDir);
        Configuration conf = getConf();
        Path tokenizedPath = new Path(outputDir, DocumentProcessor.TOKENIZED_DOCUMENT_OUTPUT_FOLDER);
        //TODO: move this into DictionaryVectorizer , and then fold SparseVectorsFrom with EncodedVectorsFrom
        // to have one framework for all of this.
        DocumentProcessor.tokenizeDocuments(inputDir, analyzerClass, tokenizedPath, conf);

        boolean sequentialAccessOutput = false;
        if (cmdLine.hasOption(sequentialAccessVectorOpt)) {
            sequentialAccessOutput = true;
        }

        boolean namedVectors = false;
        if (cmdLine.hasOption(namedVectorOpt)) {
            namedVectors = true;
        }
        boolean shouldPrune = maxDFSigma >= 0.0 || maxDFPercent > 0.00;
        String tfDirName = shouldPrune ? DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-toprune"
                : DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER;
        log.info("Creating Term Frequency Vectors");
        if (processIdf) {
            DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf,
                    minSupport, maxNGramSize, minLLRValue, -1.0f, false, reduceTasks, chunkSize,
                    sequentialAccessOutput, namedVectors);
        } else {
            DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf,
                    minSupport, maxNGramSize, minLLRValue, norm, logNormalize, reduceTasks, chunkSize,
                    sequentialAccessOutput, namedVectors);
        }

        Pair<Long[], List<Path>> docFrequenciesFeatures = null;
        // Should document frequency features be processed
        if (shouldPrune || processIdf) {
            log.info("Calculating IDF");
            docFrequenciesFeatures = TFIDFConverter.calculateDF(new Path(outputDir, tfDirName), outputDir, conf,
                    chunkSize);
        }

        long maxDF = maxDFPercent; //if we are pruning by std dev, then this will get changed
        if (shouldPrune) {
            long vectorCount = docFrequenciesFeatures.getFirst()[1];
            if (maxDFSigma >= 0.0) {
                Path dfDir = new Path(outputDir, TFIDFConverter.WORDCOUNT_OUTPUT_FOLDER);
                Path stdCalcDir = new Path(outputDir, HighDFWordsPruner.STD_CALC_DIR);

                // Calculate the standard deviation
                double stdDev = BasicStats.stdDevForGivenMean(dfDir, stdCalcDir, 0.0, conf);
                maxDF = (int) (100.0 * maxDFSigma * stdDev / vectorCount);
            }

            long maxDFThreshold = (long) (vectorCount * (maxDF / 100.0f));

            // Prune the term frequency vectors
            Path tfDir = new Path(outputDir, tfDirName);
            Path prunedTFDir = new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER);
            Path prunedPartialTFDir = new Path(outputDir,
                    DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-partial");
            log.info("Pruning");
            if (processIdf) {
                HighDFWordsPruner.pruneVectors(tfDir, prunedTFDir, prunedPartialTFDir, maxDFThreshold, minDf,
                        conf, docFrequenciesFeatures, -1.0f, false, reduceTasks);
            } else {
                HighDFWordsPruner.pruneVectors(tfDir, prunedTFDir, prunedPartialTFDir, maxDFThreshold, minDf,
                        conf, docFrequenciesFeatures, norm, logNormalize, reduceTasks);
            }
            HadoopUtil.delete(new Configuration(conf), tfDir);
        }
        if (processIdf) {
            TFIDFConverter.processTfIdf(new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER),
                    outputDir, conf, docFrequenciesFeatures, minDf, maxDF, norm, logNormalize,
                    sequentialAccessOutput, namedVectors, reduceTasks);
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
    return 0;
}

From source file:org.opencloudengine.flamingo.mapreduce.core.AbstractJob.java

/**
 * ?   ??? .// w ww .  j  av a 2  s . c  o m
 * ? <tt>-h</tt>   ?  ???  <tt>null</tt>? .
 *
 * @param args  ?? 
 * @return ?? ???  ? ? {@code Map<String,String>}.
 *         ??? key ? ? ? '--'? prefix .
 *         ? ?  {@code Map<String,String>} ? ?    ? '--'? ??? .
 */
public Map<String, String> parseArguments(String[] args) throws Exception {
    Option helpOpt = addOption(DefaultOptionCreator.helpOption());
    addOption("tempDir", null, " ", false);
    addOption("startPhase", null, "  ", "0");
    addOption("endPhase", null, "  ", String.valueOf(Integer.MAX_VALUE));

    GroupBuilder groupBuilder = new GroupBuilder().withName("Hadoop MapReduce Job :");

    for (Option opt : options) {
        groupBuilder = groupBuilder.withOption(opt);
    }

    Group group = groupBuilder.create();

    CommandLine cmdLine;
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        cmdLine = parser.parse(args);
    } catch (OptionException e) {
        log.error(e.getMessage());
        CommandLineUtil.printHelpWithGenericOptions(group, e);
        return null;
    }

    if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelpWithGenericOptions(group);
        return null;
    }

    try {
        parseDirectories(cmdLine);
    } catch (IllegalArgumentException e) {
        log.error(e.getMessage());
        CommandLineUtil.printHelpWithGenericOptions(group);
        return null;
    }

    argMap = new TreeMap<String, String>();
    maybePut(argMap, cmdLine, this.options.toArray(new Option[this.options.size()]));
    log.info("Command line arguments: ", argMap);
    Set<String> keySet = argMap.keySet();
    for (Iterator<String> iterator = keySet.iterator(); iterator.hasNext();) {
        String key = iterator.next();
        log.info("   {} = {}", key, argMap.get(key));
    }
    return argMap;
}

From source file:org.pharmgkb.util.CliHelper.java

/**
 * Parses arguments./*from   ww w .j  ava2  s .  co m*/
 */
public void parse(String[] args) throws OptionException {

    m_options = m_groupBuilder.create();
    Parser parser = new Parser();
    parser.setGroup(m_options);
    parser.setHelpOption(m_helpOption);
    m_commandLine = parser.parse(args);
}

From source file:parse_wikipedia.ParseWikipedia.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dirInputPathOpt = DefaultOptionCreator.inputOption().create();
    Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create();

    Group group = gbuilder.withName("Options").withOption(dirInputPathOpt).withOption(dirOutputPathOpt)
            .create();/* w  w  w  . j av  a  2 s . c  om*/

    Parser parser = new Parser();
    parser.setGroup(group);

    try {
        CommandLine cmdLine = parser.parse(args);

        String inputPath = (String) cmdLine.getValue(dirInputPathOpt);
        String outputPath = (String) cmdLine.getValue(dirOutputPathOpt);

        runJob(inputPath, outputPath);
    } catch (OptionException | InterruptedException | ClassNotFoundException e) {
        log.error("Exception", e);
    }

}

From source file:tk.summerway.mahout9.tools.MyClusterDumper.java

private boolean buildParse(String[] args) {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputDirOpt = DefaultOptionCreator.inputOption().create();
    Option outputDirOpt = DefaultOptionCreator.outputOption().create();

    Option outputFormatOpt = obuilder.withLongName(OUTPUT_FORMAT_OPT)
            .withArgument(abuilder.withName(OUTPUT_FORMAT_OPT).create())
            .withDescription(/*ww  w  .j a v  a2s . c om*/
                    "The optional output format for the results. Options: TEXT, CSV, JSON or GRAPH_ML. Default is TEXT")
            .withShortName("of").create();

    Option substringOpt = obuilder.withLongName(SUBSTRING_OPTION)
            .withArgument(abuilder.withName(SUBSTRING_OPTION).create())
            .withDescription("The number of chars of the asFormatString() to print").withShortName("b")
            .create();

    Option pointsDirOpt = obuilder.withLongName(POINTS_DIR_OPTION)
            .withArgument(abuilder.withName(POINTS_DIR_OPTION).create())
            .withDescription(
                    "The directory containing points sequence files mapping input vectors to their cluster. "
                            + "If specified, then the program will output the points associated with a cluster")
            .withShortName("p").create();

    Option samplePointsOpt = obuilder.withLongName(SAMPLE_POINTS)
            .withArgument(abuilder.withName(SAMPLE_POINTS).create())
            .withDescription("Specifies the maximum number of points to include _per_ cluster.  The default "
                    + "is to include all points")
            .withShortName("sp").create();

    Option dictionaryOpt = obuilder.withLongName(DICTIONARY_OPTION)
            .withArgument(abuilder.withName(DICTIONARY_OPTION).create()).withDescription("The dictionary file")
            .withShortName("d").create();

    Option dictionaryTypeOpt = obuilder.withLongName(DICTIONARY_TYPE_OPTION)
            .withArgument(abuilder.withName(DICTIONARY_TYPE_OPTION).create())
            .withDescription("The dictionary file type (text|sequencefile), default is text")
            .withShortName("dt").create();

    Option numWordsOpt = obuilder.withLongName(NUM_WORDS_OPTION)
            .withArgument(abuilder.withName(NUM_WORDS_OPTION).create())
            .withDescription("The number of top terms to print").withShortName("n").create();

    Option evaluateOpt = obuilder.withLongName(EVALUATE_CLUSTERS)
            .withArgument(abuilder.withName(EVALUATE_CLUSTERS).create())
            .withDescription("Run ClusterEvaluator and CDbwEvaluator over the input.  "
                    + "The output will be appended to the rest of the output at the end. Default is false.")
            .withShortName("e").create();

    Option distanceMeasureOpt = obuilder.withLongName("distanceMeasure")
            .withArgument(abuilder.withName("distanceMeasure").create())
            .withDescription("k-means distance measure class name").withShortName("dm").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(outputDirOpt)
            .withOption(outputFormatOpt).withOption(substringOpt).withOption(pointsDirOpt)
            .withOption(samplePointsOpt).withOption(dictionaryOpt).withOption(dictionaryTypeOpt)
            .withOption(numWordsOpt).withOption(evaluateOpt).withOption(distanceMeasureOpt).withOption(helpOpt)
            .create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return false;
        }

        seqFileDir = getInputPath();
        inputPath = getInputPath();
        inputFile = getInputFile();
        if (cmdLine.hasOption(inputDirOpt)) {
            seqFileDir = new Path(cmdLine.getValue(inputDirOpt).toString());
            inputPath = new Path(cmdLine.getValue(inputDirOpt).toString());
            inputFile = new File(cmdLine.getValue(inputDirOpt).toString());
        }
        log.info("seqFileDir value: {}", seqFileDir);
        log.info("inputPath value: {}", inputPath);
        log.info("inputFile value: {}", inputFile);

        outputPath = getOutputPath();
        outputFile = getOutputFile();
        if (cmdLine.hasOption(outputDirOpt)) {
            outputPath = new Path(cmdLine.getValue(outputDirOpt).toString());
            outputFile = new File(cmdLine.getValue(outputDirOpt).toString());
        }
        log.info("outputPath value: {}", outputPath);
        log.info("outputFile value: {}", outputFile);

        if (cmdLine.hasOption(pointsDirOpt)) {
            pointsDir = new Path(cmdLine.getValue(pointsDirOpt).toString());
        }
        log.info("pointsDir value: {}", pointsDir);

        if (cmdLine.hasOption(substringOpt)) {
            int sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
            if (sub >= 0) {
                subString = sub;
            }
        }
        log.info("subString value: {}", subString);

        termDictionary = cmdLine.getValue(dictionaryOpt).toString();
        dictionaryFormat = cmdLine.getValue(dictionaryTypeOpt).toString();
        log.info("termDictionary value: {}", termDictionary);
        log.info("dictionaryFormat value: {}", dictionaryFormat);

        if (cmdLine.hasOption(numWordsOpt)) {
            numTopFeatures = Integer.parseInt(cmdLine.getValue(numWordsOpt).toString());
        }
        log.info("numTopFeatures value: {}", numTopFeatures);

        outputFormat = OUTPUT_FORMAT.TEXT;
        if (cmdLine.hasOption(outputFormatOpt)) {
            outputFormat = OUTPUT_FORMAT.valueOf(cmdLine.getValue(outputFormatOpt).toString());
        }
        log.info("outputFormat value: {}", outputFormat);

        if (cmdLine.hasOption(samplePointsOpt)) {
            maxPointsPerCluster = Long.parseLong(cmdLine.getValue(samplePointsOpt).toString());
        } else {
            maxPointsPerCluster = Long.MAX_VALUE;
        }
        log.info("maxPointsPerCluster value: {}", maxPointsPerCluster);

        runEvaluation = cmdLine.hasOption(evaluateOpt);
        log.info("runEvaluation value: {}", runEvaluation);

        String distanceMeasureClass = null;
        if (cmdLine.hasOption(distanceMeasureOpt)) {
            distanceMeasureClass = cmdLine.getValue(distanceMeasureOpt).toString();
        }
        if (distanceMeasureClass != null) {
            measure = ClassUtils.instantiateAs(distanceMeasureClass, DistanceMeasure.class);
        }
        log.info("distanceMeasureClass value: {}", distanceMeasureClass);

    } catch (OptionException e) {
        CommandLineUtil.printHelp(group);
        log.error("parse para error", e);
    }
    return true;
}