Example usage for org.apache.commons.cli2.commandline Parser setHelpOption

Introduction

In this page you can find the example usage for org.apache.commons.cli2.commandline Parser setHelpOption.

Prototype

public void setHelpOption(final Option helpOption)

Source Link

Document

Sets the help option to use with the simplified parsing.

Usage

From source file:org.apache.mahout.text.WikipediaToSequenceFile.java

/**
 * Takes in two arguments:/*from   w  w w  .  ja v a 2  s . co m*/
 * <ol>
 * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
 * <li>The output {@link org.apache.hadoop.fs.Path} where to write the classifier as a
 * {@link org.apache.hadoop.io.SequenceFile}</li>
 * </ol>
 */
public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dirInputPathOpt = DefaultOptionCreator.inputOption().create();

    Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create();

    Option categoriesOpt = obuilder.withLongName("categories")
            .withArgument(abuilder.withName("categories").withMinimum(1).withMaximum(1).create())
            .withDescription("Location of the categories file.  One entry per line. "
                    + "Will be used to make a string match in Wikipedia Category field")
            .withShortName("c").create();

    Option exactMatchOpt = obuilder.withLongName("exactMatch")
            .withDescription("If set, then the category name must exactly match the "
                    + "entry in the categories file. Default is false")
            .withShortName("e").create();

    Option allOpt = obuilder.withLongName("all").withDescription("If set, Select all files. Default is false")
            .withShortName("all").create();

    Option removeLabelOpt = obuilder.withLongName("removeLabels")
            .withDescription("If set, remove [[Category:labels]] from document text after extracting label."
                    + "Default is false")
            .withShortName("rl").create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(categoriesOpt).withOption(dirInputPathOpt)
            .withOption(dirOutputPathOpt).withOption(exactMatchOpt).withOption(allOpt).withOption(helpOpt)
            .withOption(removeLabelOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);
    parser.setHelpOption(helpOpt);
    try {
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(dirInputPathOpt);
        String outputPath = (String) cmdLine.getValue(dirOutputPathOpt);

        String catFile = "";
        if (cmdLine.hasOption(categoriesOpt)) {
            catFile = (String) cmdLine.getValue(categoriesOpt);
        }

        boolean all = false;
        if (cmdLine.hasOption(allOpt)) {
            all = true;
        }

        boolean removeLabels = false;
        if (cmdLine.hasOption(removeLabelOpt)) {
            removeLabels = true;
        }

        runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt), all, removeLabels);
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (InterruptedException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (ClassNotFoundException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles.java

@Override
public int run(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputDirOpt = DefaultOptionCreator.inputOption().create();

    Option outputDirOpt = DefaultOptionCreator.outputOption().create();

    Option minSupportOpt = obuilder.withLongName("minSupport")
            .withArgument(abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) Minimum Support. Default Value: 2").withShortName("s").create();

    Option analyzerNameOpt = obuilder.withLongName("analyzerName")
            .withArgument(abuilder.withName("analyzerName").withMinimum(1).withMaximum(1).create())
            .withDescription("The class name of the analyzer").withShortName("a").create();

    Option chunkSizeOpt = obuilder.withLongName("chunkSize")
            .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The chunkSize in MegaBytes. Default Value: 100MB").withShortName("chunk")
            .create();/*from ww  w .  j  av a 2s .c  o m*/

    Option weightOpt = obuilder.withLongName("weight").withRequired(false)
            .withArgument(abuilder.withName("weight").withMinimum(1).withMaximum(1).create())
            .withDescription("The kind of weight to use. Currently TF or TFIDF. Default: TFIDF")
            .withShortName("wt").create();

    Option minDFOpt = obuilder.withLongName("minDF").withRequired(false)
            .withArgument(abuilder.withName("minDF").withMinimum(1).withMaximum(1).create())
            .withDescription("The minimum document frequency.  Default is 1").withShortName("md").create();

    Option maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false)
            .withArgument(abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The max percentage of docs for the DF.  Can be used to remove really high frequency terms."
                            + " Expressed as an integer between 0 and 100. Default is 99.  If maxDFSigma is also set, "
                            + "it will override this value.")
            .withShortName("x").create();

    Option maxDFSigmaOpt = obuilder.withLongName("maxDFSigma").withRequired(false)
            .withArgument(abuilder.withName("maxDFSigma").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "What portion of the tf (tf-idf) vectors to be used, expressed in times the standard deviation (sigma) "
                            + "of the document frequencies of these vectors. Can be used to remove really high frequency terms."
                            + " Expressed as a double value. Good value to be specified is 3.0. In case the value is less "
                            + "than 0 no vectors will be filtered out. Default is -1.0.  Overrides maxDFPercent")
            .withShortName("xs").create();

    Option minLLROpt = obuilder.withLongName("minLLR").withRequired(false)
            .withArgument(abuilder.withName("minLLR").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional)The minimum Log Likelihood Ratio(Float)  Default is "
                    + LLRReducer.DEFAULT_MIN_LLR)
            .withShortName("ml").create();

    Option numReduceTasksOpt = obuilder.withLongName("numReducers")
            .withArgument(abuilder.withName("numReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) Number of reduce tasks. Default Value: 1").withShortName("nr")
            .create();

    Option powerOpt = obuilder.withLongName("norm").withRequired(false)
            .withArgument(abuilder.withName("norm").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The norm to use, expressed as either a float or \"INF\" if you want to use the Infinite norm.  "
                            + "Must be greater or equal to 0.  The default is not to normalize")
            .withShortName("n").create();

    Option logNormalizeOpt = obuilder.withLongName("logNormalize").withRequired(false)
            .withDescription("(Optional) Whether output vectors should be logNormalize. If set true else false")
            .withShortName("lnorm").create();

    Option maxNGramSizeOpt = obuilder.withLongName("maxNGramSize").withRequired(false)
            .withArgument(abuilder.withName("ngramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) The maximum size of ngrams to create"
                    + " (2 = bigrams, 3 = trigrams, etc) Default Value:1")
            .withShortName("ng").create();

    Option sequentialAccessVectorOpt = obuilder.withLongName("sequentialAccessVector").withRequired(false)
            .withDescription(
                    "(Optional) Whether output vectors should be SequentialAccessVectors. If set true else false")
            .withShortName("seq").create();

    Option namedVectorOpt = obuilder.withLongName("namedVector").withRequired(false)
            .withDescription("(Optional) Whether output vectors should be NamedVectors. If set true else false")
            .withShortName("nv").create();

    Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false)
            .withDescription("If set, overwrite the output directory").withShortName("ow").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(minSupportOpt).withOption(analyzerNameOpt)
            .withOption(chunkSizeOpt).withOption(outputDirOpt).withOption(inputDirOpt).withOption(minDFOpt)
            .withOption(maxDFSigmaOpt).withOption(maxDFPercentOpt).withOption(weightOpt).withOption(powerOpt)
            .withOption(minLLROpt).withOption(numReduceTasksOpt).withOption(maxNGramSizeOpt)
            .withOption(overwriteOutput).withOption(helpOpt).withOption(sequentialAccessVectorOpt)
            .withOption(namedVectorOpt).withOption(logNormalizeOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        Path inputDir = new Path((String) cmdLine.getValue(inputDirOpt));
        Path outputDir = new Path((String) cmdLine.getValue(outputDirOpt));

        int chunkSize = 100;
        if (cmdLine.hasOption(chunkSizeOpt)) {
            chunkSize = Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt));
        }
        int minSupport = 2;
        if (cmdLine.hasOption(minSupportOpt)) {
            String minSupportString = (String) cmdLine.getValue(minSupportOpt);
            minSupport = Integer.parseInt(minSupportString);
        }

        int maxNGramSize = 1;

        if (cmdLine.hasOption(maxNGramSizeOpt)) {
            try {
                maxNGramSize = Integer.parseInt(cmdLine.getValue(maxNGramSizeOpt).toString());
            } catch (NumberFormatException ex) {
                log.warn("Could not parse ngram size option");
            }
        }
        log.info("Maximum n-gram size is: {}", maxNGramSize);

        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.delete(getConf(), outputDir);
        }

        float minLLRValue = LLRReducer.DEFAULT_MIN_LLR;
        if (cmdLine.hasOption(minLLROpt)) {
            minLLRValue = Float.parseFloat(cmdLine.getValue(minLLROpt).toString());
        }
        log.info("Minimum LLR value: {}", minLLRValue);

        int reduceTasks = 1;
        if (cmdLine.hasOption(numReduceTasksOpt)) {
            reduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        }
        log.info("Number of reduce tasks: {}", reduceTasks);

        Class<? extends Analyzer> analyzerClass = StandardAnalyzer.class;
        if (cmdLine.hasOption(analyzerNameOpt)) {
            String className = cmdLine.getValue(analyzerNameOpt).toString();
            analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
            // try instantiating it, b/c there isn't any point in setting it if
            // you can't instantiate it
            AnalyzerUtils.createAnalyzer(analyzerClass);
        }

        boolean processIdf;

        if (cmdLine.hasOption(weightOpt)) {
            String wString = cmdLine.getValue(weightOpt).toString();
            if ("tf".equalsIgnoreCase(wString)) {
                processIdf = false;
            } else if ("tfidf".equalsIgnoreCase(wString)) {
                processIdf = true;
            } else {
                throw new OptionException(weightOpt);
            }
        } else {
            processIdf = true;
        }

        int minDf = 1;
        if (cmdLine.hasOption(minDFOpt)) {
            minDf = Integer.parseInt(cmdLine.getValue(minDFOpt).toString());
        }
        int maxDFPercent = 99;
        if (cmdLine.hasOption(maxDFPercentOpt)) {
            maxDFPercent = Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString());
        }
        double maxDFSigma = -1.0;
        if (cmdLine.hasOption(maxDFSigmaOpt)) {
            maxDFSigma = Double.parseDouble(cmdLine.getValue(maxDFSigmaOpt).toString());
        }

        float norm = PartialVectorMerger.NO_NORMALIZING;
        if (cmdLine.hasOption(powerOpt)) {
            String power = cmdLine.getValue(powerOpt).toString();
            if ("INF".equals(power)) {
                norm = Float.POSITIVE_INFINITY;
            } else {
                norm = Float.parseFloat(power);
            }
        }

        boolean logNormalize = false;
        if (cmdLine.hasOption(logNormalizeOpt)) {
            logNormalize = true;
        }
        log.info("Tokenizing documents in {}", inputDir);
        Configuration conf = getConf();
        Path tokenizedPath = new Path(outputDir, DocumentProcessor.TOKENIZED_DOCUMENT_OUTPUT_FOLDER);
        //TODO: move this into DictionaryVectorizer , and then fold SparseVectorsFrom with EncodedVectorsFrom
        // to have one framework for all of this.
        DocumentProcessor.tokenizeDocuments(inputDir, analyzerClass, tokenizedPath, conf);

        boolean sequentialAccessOutput = false;
        if (cmdLine.hasOption(sequentialAccessVectorOpt)) {
            sequentialAccessOutput = true;
        }

        boolean namedVectors = false;
        if (cmdLine.hasOption(namedVectorOpt)) {
            namedVectors = true;
        }
        boolean shouldPrune = maxDFSigma >= 0.0 || maxDFPercent > 0.00;
        String tfDirName = shouldPrune ? DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-toprune"
                : DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER;
        log.info("Creating Term Frequency Vectors");
        if (processIdf) {
            DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf,
                    minSupport, maxNGramSize, minLLRValue, -1.0f, false, reduceTasks, chunkSize,
                    sequentialAccessOutput, namedVectors);
        } else {
            DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf,
                    minSupport, maxNGramSize, minLLRValue, norm, logNormalize, reduceTasks, chunkSize,
                    sequentialAccessOutput, namedVectors);
        }

        Pair<Long[], List<Path>> docFrequenciesFeatures = null;
        // Should document frequency features be processed
        if (shouldPrune || processIdf) {
            log.info("Calculating IDF");
            docFrequenciesFeatures = TFIDFConverter.calculateDF(new Path(outputDir, tfDirName), outputDir, conf,
                    chunkSize);
        }

        long maxDF = maxDFPercent; //if we are pruning by std dev, then this will get changed
        if (shouldPrune) {
            long vectorCount = docFrequenciesFeatures.getFirst()[1];
            if (maxDFSigma >= 0.0) {
                Path dfDir = new Path(outputDir, TFIDFConverter.WORDCOUNT_OUTPUT_FOLDER);
                Path stdCalcDir = new Path(outputDir, HighDFWordsPruner.STD_CALC_DIR);

                // Calculate the standard deviation
                double stdDev = BasicStats.stdDevForGivenMean(dfDir, stdCalcDir, 0.0, conf);
                maxDF = (int) (100.0 * maxDFSigma * stdDev / vectorCount);
            }

            long maxDFThreshold = (long) (vectorCount * (maxDF / 100.0f));

            // Prune the term frequency vectors
            Path tfDir = new Path(outputDir, tfDirName);
            Path prunedTFDir = new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER);
            Path prunedPartialTFDir = new Path(outputDir,
                    DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-partial");
            log.info("Pruning");
            if (processIdf) {
                HighDFWordsPruner.pruneVectors(tfDir, prunedTFDir, prunedPartialTFDir, maxDFThreshold, minDf,
                        conf, docFrequenciesFeatures, -1.0f, false, reduceTasks);
            } else {
                HighDFWordsPruner.pruneVectors(tfDir, prunedTFDir, prunedPartialTFDir, maxDFThreshold, minDf,
                        conf, docFrequenciesFeatures, norm, logNormalize, reduceTasks);
            }
            HadoopUtil.delete(new Configuration(conf), tfDir);
        }
        if (processIdf) {
            TFIDFConverter.processTfIdf(new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER),
                    outputDir, conf, docFrequenciesFeatures, minDf, maxDF, norm, logNormalize,
                    sequentialAccessOutput, namedVectors, reduceTasks);
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
    return 0;
}

From source file:org.opencloudengine.flamingo.mapreduce.core.AbstractJob.java

/**
 * ?   ??? ./*  w  w w .  j  a  v  a 2 s .  c o  m*/
 * ? <tt>-h</tt>   ?  ???  <tt>null</tt>? .
 *
 * @param args  ?? 
 * @return ?? ???  ? ? {@code Map<String,String>}.
 *         ??? key ? ? ? '--'? prefix .
 *         ? ?  {@code Map<String,String>} ? ?    ? '--'? ??? .
 */
public Map<String, String> parseArguments(String[] args) throws Exception {
    Option helpOpt = addOption(DefaultOptionCreator.helpOption());
    addOption("tempDir", null, " ", false);
    addOption("startPhase", null, "  ", "0");
    addOption("endPhase", null, "  ", String.valueOf(Integer.MAX_VALUE));

    GroupBuilder groupBuilder = new GroupBuilder().withName("Hadoop MapReduce Job :");

    for (Option opt : options) {
        groupBuilder = groupBuilder.withOption(opt);
    }

    Group group = groupBuilder.create();

    CommandLine cmdLine;
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        cmdLine = parser.parse(args);
    } catch (OptionException e) {
        log.error(e.getMessage());
        CommandLineUtil.printHelpWithGenericOptions(group, e);
        return null;
    }

    if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelpWithGenericOptions(group);
        return null;
    }

    try {
        parseDirectories(cmdLine);
    } catch (IllegalArgumentException e) {
        log.error(e.getMessage());
        CommandLineUtil.printHelpWithGenericOptions(group);
        return null;
    }

    argMap = new TreeMap<String, String>();
    maybePut(argMap, cmdLine, this.options.toArray(new Option[this.options.size()]));
    log.info("Command line arguments: ", argMap);
    Set<String> keySet = argMap.keySet();
    for (Iterator<String> iterator = keySet.iterator(); iterator.hasNext();) {
        String key = iterator.next();
        log.info("   {} = {}", key, argMap.get(key));
    }
    return argMap;
}

From source file:org.pharmgkb.util.CliHelper.java

/**
 * Parses arguments.// w  ww.  ja  v a 2s . c om
 */
public void parse(String[] args) throws OptionException {

    m_options = m_groupBuilder.create();
    Parser parser = new Parser();
    parser.setGroup(m_options);
    parser.setHelpOption(m_helpOption);
    m_commandLine = parser.parse(args);
}

From source file:org.rvsnoop.ui.RvSnoopApplication.java

private CommandLine parseCommandLine(String[] args, Option helpOption, Option projectOption) {
    Group group = new GroupBuilder().withOption(helpOption).withOption(projectOption).create();
    Parser parser = new Parser();
    parser.setGroup(group);/* ww  w . j a va 2 s  . c  om*/
    parser.setHelpOption(helpOption);
    parser.setHelpFormatter(new HelpFormatter());
    CommandLine line = parser.parseAndHelp(args);
    if (line.hasOption(helpOption)) {
        System.exit(0);
    }
    return line;
}

From source file:org.rzo.yajsw.WrapperExe.java

/**
 * Parses the command./* ww w  . j  a  v  a2  s  .com*/
 * 
 * @param args
 *            the args
 */
private static void parseCommand(String[] args) {
    Parser parser = new Parser();

    // configure a HelpFormatter
    HelpFormatter hf = new HelpFormatter();
    DefaultOptionBuilder oBuilder = new DefaultOptionBuilder();
    ;

    // configure a parser
    Parser p = new Parser();
    p.setGroup(group);
    p.setHelpFormatter(hf);
    p.setHelpOption(oBuilder.withLongName("help").withShortName("?").create());
    cl = p.parseAndHelp(args);

    // abort application if no CommandLine was parsed
    if (cl == null) {
        System.exit(-1);
    }
    cmds = cl.getOptions();
    try {
        confFile = (String) cl.getValue(CONF_FILE);
    } catch (Exception ex) {
        System.out.println("no wrapper config file found ");
    }
    try {
        defaultFile = (String) cl.getValue(cl.getOption("-d"));
        if (defaultFile != null)
            defaultFile = new File(defaultFile).getCanonicalPath();
    } catch (Exception ex) {
        // no defaults -> maybe ok
    }
    properties = cl.getValues(PROPERTIES);

}

From source file:tk.summerway.mahout9.tools.MyClusterDumper.java

private boolean buildParse(String[] args) {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputDirOpt = DefaultOptionCreator.inputOption().create();
    Option outputDirOpt = DefaultOptionCreator.outputOption().create();

    Option outputFormatOpt = obuilder.withLongName(OUTPUT_FORMAT_OPT)
            .withArgument(abuilder.withName(OUTPUT_FORMAT_OPT).create())
            .withDescription(/*from  w ww.  j  a  va 2s. co  m*/
                    "The optional output format for the results. Options: TEXT, CSV, JSON or GRAPH_ML. Default is TEXT")
            .withShortName("of").create();

    Option substringOpt = obuilder.withLongName(SUBSTRING_OPTION)
            .withArgument(abuilder.withName(SUBSTRING_OPTION).create())
            .withDescription("The number of chars of the asFormatString() to print").withShortName("b")
            .create();

    Option pointsDirOpt = obuilder.withLongName(POINTS_DIR_OPTION)
            .withArgument(abuilder.withName(POINTS_DIR_OPTION).create())
            .withDescription(
                    "The directory containing points sequence files mapping input vectors to their cluster. "
                            + "If specified, then the program will output the points associated with a cluster")
            .withShortName("p").create();

    Option samplePointsOpt = obuilder.withLongName(SAMPLE_POINTS)
            .withArgument(abuilder.withName(SAMPLE_POINTS).create())
            .withDescription("Specifies the maximum number of points to include _per_ cluster.  The default "
                    + "is to include all points")
            .withShortName("sp").create();

    Option dictionaryOpt = obuilder.withLongName(DICTIONARY_OPTION)
            .withArgument(abuilder.withName(DICTIONARY_OPTION).create()).withDescription("The dictionary file")
            .withShortName("d").create();

    Option dictionaryTypeOpt = obuilder.withLongName(DICTIONARY_TYPE_OPTION)
            .withArgument(abuilder.withName(DICTIONARY_TYPE_OPTION).create())
            .withDescription("The dictionary file type (text|sequencefile), default is text")
            .withShortName("dt").create();

    Option numWordsOpt = obuilder.withLongName(NUM_WORDS_OPTION)
            .withArgument(abuilder.withName(NUM_WORDS_OPTION).create())
            .withDescription("The number of top terms to print").withShortName("n").create();

    Option evaluateOpt = obuilder.withLongName(EVALUATE_CLUSTERS)
            .withArgument(abuilder.withName(EVALUATE_CLUSTERS).create())
            .withDescription("Run ClusterEvaluator and CDbwEvaluator over the input.  "
                    + "The output will be appended to the rest of the output at the end. Default is false.")
            .withShortName("e").create();

    Option distanceMeasureOpt = obuilder.withLongName("distanceMeasure")
            .withArgument(abuilder.withName("distanceMeasure").create())
            .withDescription("k-means distance measure class name").withShortName("dm").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(outputDirOpt)
            .withOption(outputFormatOpt).withOption(substringOpt).withOption(pointsDirOpt)
            .withOption(samplePointsOpt).withOption(dictionaryOpt).withOption(dictionaryTypeOpt)
            .withOption(numWordsOpt).withOption(evaluateOpt).withOption(distanceMeasureOpt).withOption(helpOpt)
            .create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return false;
        }

        seqFileDir = getInputPath();
        inputPath = getInputPath();
        inputFile = getInputFile();
        if (cmdLine.hasOption(inputDirOpt)) {
            seqFileDir = new Path(cmdLine.getValue(inputDirOpt).toString());
            inputPath = new Path(cmdLine.getValue(inputDirOpt).toString());
            inputFile = new File(cmdLine.getValue(inputDirOpt).toString());
        }
        log.info("seqFileDir value: {}", seqFileDir);
        log.info("inputPath value: {}", inputPath);
        log.info("inputFile value: {}", inputFile);

        outputPath = getOutputPath();
        outputFile = getOutputFile();
        if (cmdLine.hasOption(outputDirOpt)) {
            outputPath = new Path(cmdLine.getValue(outputDirOpt).toString());
            outputFile = new File(cmdLine.getValue(outputDirOpt).toString());
        }
        log.info("outputPath value: {}", outputPath);
        log.info("outputFile value: {}", outputFile);

        if (cmdLine.hasOption(pointsDirOpt)) {
            pointsDir = new Path(cmdLine.getValue(pointsDirOpt).toString());
        }
        log.info("pointsDir value: {}", pointsDir);

        if (cmdLine.hasOption(substringOpt)) {
            int sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
            if (sub >= 0) {
                subString = sub;
            }
        }
        log.info("subString value: {}", subString);

        termDictionary = cmdLine.getValue(dictionaryOpt).toString();
        dictionaryFormat = cmdLine.getValue(dictionaryTypeOpt).toString();
        log.info("termDictionary value: {}", termDictionary);
        log.info("dictionaryFormat value: {}", dictionaryFormat);

        if (cmdLine.hasOption(numWordsOpt)) {
            numTopFeatures = Integer.parseInt(cmdLine.getValue(numWordsOpt).toString());
        }
        log.info("numTopFeatures value: {}", numTopFeatures);

        outputFormat = OUTPUT_FORMAT.TEXT;
        if (cmdLine.hasOption(outputFormatOpt)) {
            outputFormat = OUTPUT_FORMAT.valueOf(cmdLine.getValue(outputFormatOpt).toString());
        }
        log.info("outputFormat value: {}", outputFormat);

        if (cmdLine.hasOption(samplePointsOpt)) {
            maxPointsPerCluster = Long.parseLong(cmdLine.getValue(samplePointsOpt).toString());
        } else {
            maxPointsPerCluster = Long.MAX_VALUE;
        }
        log.info("maxPointsPerCluster value: {}", maxPointsPerCluster);

        runEvaluation = cmdLine.hasOption(evaluateOpt);
        log.info("runEvaluation value: {}", runEvaluation);

        String distanceMeasureClass = null;
        if (cmdLine.hasOption(distanceMeasureOpt)) {
            distanceMeasureClass = cmdLine.getValue(distanceMeasureOpt).toString();
        }
        if (distanceMeasureClass != null) {
            measure = ClassUtils.instantiateAs(distanceMeasureClass, DistanceMeasure.class);
        }
        log.info("distanceMeasureClass value: {}", distanceMeasureClass);

    } catch (OptionException e) {
        CommandLineUtil.printHelp(group);
        log.error("parse para error", e);
    }
    return true;
}