List of usage examples for org.apache.commons.cli2.commandline Parser parse
public CommandLine parse(final String[] arguments) throws OptionException
From source file:org.apache.mahout.utils.vectors.lucene.Driver.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("dir").withRequired(true) .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create()) .withDescription("The Lucene directory").withShortName("d").create(); Option outputOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output file").withShortName("o").create(); Option fieldOpt = obuilder.withLongName("field").withRequired(true) .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create()) .withDescription("The field in the index").withShortName("f").create(); Option idFieldOpt = obuilder.withLongName("idField").withRequired(false) .withArgument(abuilder.withName("idField").withMinimum(1).withMaximum(1).create()) .withDescription(/*from w w w. ja v a 2 s. c om*/ "The field in the index containing the index. If null, then the Lucene internal doc " + "id is used which is prone to error if the underlying index changes") .create(); Option dictOutOpt = obuilder.withLongName("dictOut").withRequired(true) .withArgument(abuilder.withName("dictOut").withMinimum(1).withMaximum(1).create()) .withDescription("The output of the dictionary").withShortName("t").create(); Option seqDictOutOpt = obuilder.withLongName("seqDictOut").withRequired(false) .withArgument(abuilder.withName("seqDictOut").withMinimum(1).withMaximum(1).create()) .withDescription("The output of the dictionary as sequence file").withShortName("st").create(); Option weightOpt = obuilder.withLongName("weight").withRequired(false) .withArgument(abuilder.withName("weight").withMinimum(1).withMaximum(1).create()) .withDescription("The kind of weight to use. Currently TF or TFIDF").withShortName("w").create(); Option delimiterOpt = obuilder.withLongName("delimiter").withRequired(false) .withArgument(abuilder.withName("delimiter").withMinimum(1).withMaximum(1).create()) .withDescription("The delimiter for outputting the dictionary").withShortName("l").create(); Option powerOpt = obuilder.withLongName("norm").withRequired(false) .withArgument(abuilder.withName("norm").withMinimum(1).withMaximum(1).create()) .withDescription( "The norm to use, expressed as either a double or \"INF\" if you want to use the Infinite norm. " + "Must be greater or equal to 0. The default is not to normalize") .withShortName("n").create(); Option maxOpt = obuilder.withLongName("max").withRequired(false) .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create()) .withDescription( "The maximum number of vectors to output. If not specified, then it will loop over all docs") .withShortName("m").create(); Option minDFOpt = obuilder.withLongName("minDF").withRequired(false) .withArgument(abuilder.withName("minDF").withMinimum(1).withMaximum(1).create()) .withDescription("The minimum document frequency. Default is 1").withShortName("md").create(); Option maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false) .withArgument(abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create()) .withDescription( "The max percentage of docs for the DF. Can be used to remove really high frequency terms." + " Expressed as an integer between 0 and 100. Default is 99.") .withShortName("x").create(); Option maxPercentErrorDocsOpt = obuilder.withLongName("maxPercentErrorDocs").withRequired(false) .withArgument(abuilder.withName("maxPercentErrorDocs").withMinimum(1).withMaximum(1).create()) .withDescription( "The max percentage of docs that can have a null term vector. These are noise document and can occur if the " + "analyzer used strips out all terms in the target field. This percentage is expressed as a value " + "between 0 and 1. The default is 0.") .withShortName("err").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(idFieldOpt).withOption(outputOpt) .withOption(delimiterOpt).withOption(helpOpt).withOption(fieldOpt).withOption(maxOpt) .withOption(dictOutOpt).withOption(seqDictOutOpt).withOption(powerOpt).withOption(maxDFPercentOpt) .withOption(weightOpt).withOption(minDFOpt).withOption(maxPercentErrorDocsOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } if (cmdLine.hasOption(inputOpt)) { // Lucene case Driver luceneDriver = new Driver(); luceneDriver.setLuceneDir(cmdLine.getValue(inputOpt).toString()); if (cmdLine.hasOption(maxOpt)) { luceneDriver.setMaxDocs(Long.parseLong(cmdLine.getValue(maxOpt).toString())); } if (cmdLine.hasOption(weightOpt)) { luceneDriver.setWeightType(cmdLine.getValue(weightOpt).toString()); } luceneDriver.setField(cmdLine.getValue(fieldOpt).toString()); if (cmdLine.hasOption(minDFOpt)) { luceneDriver.setMinDf(Integer.parseInt(cmdLine.getValue(minDFOpt).toString())); } if (cmdLine.hasOption(maxDFPercentOpt)) { luceneDriver.setMaxDFPercent(Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString())); } if (cmdLine.hasOption(powerOpt)) { String power = cmdLine.getValue(powerOpt).toString(); if ("INF".equals(power)) { luceneDriver.setNorm(Double.POSITIVE_INFINITY); } else { luceneDriver.setNorm(Double.parseDouble(power)); } } if (cmdLine.hasOption(idFieldOpt)) { luceneDriver.setIdField(cmdLine.getValue(idFieldOpt).toString()); } if (cmdLine.hasOption(maxPercentErrorDocsOpt)) { luceneDriver.setMaxPercentErrorDocs( Double.parseDouble(cmdLine.getValue(maxPercentErrorDocsOpt).toString())); } luceneDriver.setOutFile(cmdLine.getValue(outputOpt).toString()); luceneDriver.setDelimiter( cmdLine.hasOption(delimiterOpt) ? cmdLine.getValue(delimiterOpt).toString() : "\t"); luceneDriver.setDictOut(cmdLine.getValue(dictOutOpt).toString()); if (cmdLine.hasOption(seqDictOutOpt)) { luceneDriver.setSeqDictOut(cmdLine.getValue(seqDictOutOpt).toString()); } luceneDriver.dumpVectors(); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.utils.vectors.lucene.SeqFilePrint.java
public static void main(String[] args) throws OptionException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("inputFile").withRequired(true) .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The output of the dictionary as sequence file").withShortName("inputFile") .create();/*from w w w .j a va 2 s . co m*/ Option outFileOpt = obuilder.withLongName("outFile").withRequired(true) .withArgument(abuilder.withName("outfolder").withMinimum(1).withMaximum(1).create()) .withDescription("The output of the dictionary as sequence file").withShortName("outFile").create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outFileOpt).create(); SeqFilePrint seqFilePrint = new SeqFilePrint(); Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(inputOpt)) { seqFilePrint.setInputSeqFile(cmdLine.getValue(inputOpt).toString()); } if (cmdLine.hasOption(outFileOpt)) { seqFilePrint.setOutFile(cmdLine.getValue(outFileOpt).toString()); } try { seqFilePrint.run(args); } catch (Exception ex) { Logger.getLogger(SeqFilePrint.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles.java
@Override public int run(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputDirOpt = DefaultOptionCreator.inputOption().create(); Option outputDirOpt = DefaultOptionCreator.outputOption().create(); Option minSupportOpt = obuilder.withLongName("minSupport") .withArgument(abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional) Minimum Support. Default Value: 2").withShortName("s").create(); Option analyzerNameOpt = obuilder.withLongName("analyzerName") .withArgument(abuilder.withName("analyzerName").withMinimum(1).withMaximum(1).create()) .withDescription("The class name of the analyzer").withShortName("a").create(); Option chunkSizeOpt = obuilder.withLongName("chunkSize") .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create()) .withDescription("The chunkSize in MegaBytes. Default Value: 100MB").withShortName("chunk") .create();/* ww w . j av a 2s . c o m*/ Option weightOpt = obuilder.withLongName("weight").withRequired(false) .withArgument(abuilder.withName("weight").withMinimum(1).withMaximum(1).create()) .withDescription("The kind of weight to use. Currently TF or TFIDF. Default: TFIDF") .withShortName("wt").create(); Option minDFOpt = obuilder.withLongName("minDF").withRequired(false) .withArgument(abuilder.withName("minDF").withMinimum(1).withMaximum(1).create()) .withDescription("The minimum document frequency. Default is 1").withShortName("md").create(); Option maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false) .withArgument(abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create()) .withDescription( "The max percentage of docs for the DF. Can be used to remove really high frequency terms." + " Expressed as an integer between 0 and 100. Default is 99. If maxDFSigma is also set, " + "it will override this value.") .withShortName("x").create(); Option maxDFSigmaOpt = obuilder.withLongName("maxDFSigma").withRequired(false) .withArgument(abuilder.withName("maxDFSigma").withMinimum(1).withMaximum(1).create()) .withDescription( "What portion of the tf (tf-idf) vectors to be used, expressed in times the standard deviation (sigma) " + "of the document frequencies of these vectors. Can be used to remove really high frequency terms." + " Expressed as a double value. Good value to be specified is 3.0. In case the value is less " + "than 0 no vectors will be filtered out. Default is -1.0. Overrides maxDFPercent") .withShortName("xs").create(); Option minLLROpt = obuilder.withLongName("minLLR").withRequired(false) .withArgument(abuilder.withName("minLLR").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional)The minimum Log Likelihood Ratio(Float) Default is " + LLRReducer.DEFAULT_MIN_LLR) .withShortName("ml").create(); Option numReduceTasksOpt = obuilder.withLongName("numReducers") .withArgument(abuilder.withName("numReducers").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional) Number of reduce tasks. Default Value: 1").withShortName("nr") .create(); Option powerOpt = obuilder.withLongName("norm").withRequired(false) .withArgument(abuilder.withName("norm").withMinimum(1).withMaximum(1).create()) .withDescription( "The norm to use, expressed as either a float or \"INF\" if you want to use the Infinite norm. " + "Must be greater or equal to 0. The default is not to normalize") .withShortName("n").create(); Option logNormalizeOpt = obuilder.withLongName("logNormalize").withRequired(false) .withDescription("(Optional) Whether output vectors should be logNormalize. If set true else false") .withShortName("lnorm").create(); Option maxNGramSizeOpt = obuilder.withLongName("maxNGramSize").withRequired(false) .withArgument(abuilder.withName("ngramSize").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional) The maximum size of ngrams to create" + " (2 = bigrams, 3 = trigrams, etc) Default Value:1") .withShortName("ng").create(); Option sequentialAccessVectorOpt = obuilder.withLongName("sequentialAccessVector").withRequired(false) .withDescription( "(Optional) Whether output vectors should be SequentialAccessVectors. If set true else false") .withShortName("seq").create(); Option namedVectorOpt = obuilder.withLongName("namedVector").withRequired(false) .withDescription("(Optional) Whether output vectors should be NamedVectors. If set true else false") .withShortName("nv").create(); Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false) .withDescription("If set, overwrite the output directory").withShortName("ow").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(minSupportOpt).withOption(analyzerNameOpt) .withOption(chunkSizeOpt).withOption(outputDirOpt).withOption(inputDirOpt).withOption(minDFOpt) .withOption(maxDFSigmaOpt).withOption(maxDFPercentOpt).withOption(weightOpt).withOption(powerOpt) .withOption(minLLROpt).withOption(numReduceTasksOpt).withOption(maxNGramSizeOpt) .withOption(overwriteOutput).withOption(helpOpt).withOption(sequentialAccessVectorOpt) .withOption(namedVectorOpt).withOption(logNormalizeOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return -1; } Path inputDir = new Path((String) cmdLine.getValue(inputDirOpt)); Path outputDir = new Path((String) cmdLine.getValue(outputDirOpt)); int chunkSize = 100; if (cmdLine.hasOption(chunkSizeOpt)) { chunkSize = Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt)); } int minSupport = 2; if (cmdLine.hasOption(minSupportOpt)) { String minSupportString = (String) cmdLine.getValue(minSupportOpt); minSupport = Integer.parseInt(minSupportString); } int maxNGramSize = 1; if (cmdLine.hasOption(maxNGramSizeOpt)) { try { maxNGramSize = Integer.parseInt(cmdLine.getValue(maxNGramSizeOpt).toString()); } catch (NumberFormatException ex) { log.warn("Could not parse ngram size option"); } } log.info("Maximum n-gram size is: {}", maxNGramSize); if (cmdLine.hasOption(overwriteOutput)) { HadoopUtil.delete(getConf(), outputDir); } float minLLRValue = LLRReducer.DEFAULT_MIN_LLR; if (cmdLine.hasOption(minLLROpt)) { minLLRValue = Float.parseFloat(cmdLine.getValue(minLLROpt).toString()); } log.info("Minimum LLR value: {}", minLLRValue); int reduceTasks = 1; if (cmdLine.hasOption(numReduceTasksOpt)) { reduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString()); } log.info("Number of reduce tasks: {}", reduceTasks); Class<? extends Analyzer> analyzerClass = StandardAnalyzer.class; if (cmdLine.hasOption(analyzerNameOpt)) { String className = cmdLine.getValue(analyzerNameOpt).toString(); analyzerClass = Class.forName(className).asSubclass(Analyzer.class); // try instantiating it, b/c there isn't any point in setting it if // you can't instantiate it AnalyzerUtils.createAnalyzer(analyzerClass); } boolean processIdf; if (cmdLine.hasOption(weightOpt)) { String wString = cmdLine.getValue(weightOpt).toString(); if ("tf".equalsIgnoreCase(wString)) { processIdf = false; } else if ("tfidf".equalsIgnoreCase(wString)) { processIdf = true; } else { throw new OptionException(weightOpt); } } else { processIdf = true; } int minDf = 1; if (cmdLine.hasOption(minDFOpt)) { minDf = Integer.parseInt(cmdLine.getValue(minDFOpt).toString()); } int maxDFPercent = 99; if (cmdLine.hasOption(maxDFPercentOpt)) { maxDFPercent = Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString()); } double maxDFSigma = -1.0; if (cmdLine.hasOption(maxDFSigmaOpt)) { maxDFSigma = Double.parseDouble(cmdLine.getValue(maxDFSigmaOpt).toString()); } float norm = PartialVectorMerger.NO_NORMALIZING; if (cmdLine.hasOption(powerOpt)) { String power = cmdLine.getValue(powerOpt).toString(); if ("INF".equals(power)) { norm = Float.POSITIVE_INFINITY; } else { norm = Float.parseFloat(power); } } boolean logNormalize = false; if (cmdLine.hasOption(logNormalizeOpt)) { logNormalize = true; } log.info("Tokenizing documents in {}", inputDir); Configuration conf = getConf(); Path tokenizedPath = new Path(outputDir, DocumentProcessor.TOKENIZED_DOCUMENT_OUTPUT_FOLDER); //TODO: move this into DictionaryVectorizer , and then fold SparseVectorsFrom with EncodedVectorsFrom // to have one framework for all of this. DocumentProcessor.tokenizeDocuments(inputDir, analyzerClass, tokenizedPath, conf); boolean sequentialAccessOutput = false; if (cmdLine.hasOption(sequentialAccessVectorOpt)) { sequentialAccessOutput = true; } boolean namedVectors = false; if (cmdLine.hasOption(namedVectorOpt)) { namedVectors = true; } boolean shouldPrune = maxDFSigma >= 0.0 || maxDFPercent > 0.00; String tfDirName = shouldPrune ? DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-toprune" : DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER; log.info("Creating Term Frequency Vectors"); if (processIdf) { DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf, minSupport, maxNGramSize, minLLRValue, -1.0f, false, reduceTasks, chunkSize, sequentialAccessOutput, namedVectors); } else { DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf, minSupport, maxNGramSize, minLLRValue, norm, logNormalize, reduceTasks, chunkSize, sequentialAccessOutput, namedVectors); } Pair<Long[], List<Path>> docFrequenciesFeatures = null; // Should document frequency features be processed if (shouldPrune || processIdf) { log.info("Calculating IDF"); docFrequenciesFeatures = TFIDFConverter.calculateDF(new Path(outputDir, tfDirName), outputDir, conf, chunkSize); } long maxDF = maxDFPercent; //if we are pruning by std dev, then this will get changed if (shouldPrune) { long vectorCount = docFrequenciesFeatures.getFirst()[1]; if (maxDFSigma >= 0.0) { Path dfDir = new Path(outputDir, TFIDFConverter.WORDCOUNT_OUTPUT_FOLDER); Path stdCalcDir = new Path(outputDir, HighDFWordsPruner.STD_CALC_DIR); // Calculate the standard deviation double stdDev = BasicStats.stdDevForGivenMean(dfDir, stdCalcDir, 0.0, conf); maxDF = (int) (100.0 * maxDFSigma * stdDev / vectorCount); } long maxDFThreshold = (long) (vectorCount * (maxDF / 100.0f)); // Prune the term frequency vectors Path tfDir = new Path(outputDir, tfDirName); Path prunedTFDir = new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER); Path prunedPartialTFDir = new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-partial"); log.info("Pruning"); if (processIdf) { HighDFWordsPruner.pruneVectors(tfDir, prunedTFDir, prunedPartialTFDir, maxDFThreshold, minDf, conf, docFrequenciesFeatures, -1.0f, false, reduceTasks); } else { HighDFWordsPruner.pruneVectors(tfDir, prunedTFDir, prunedPartialTFDir, maxDFThreshold, minDf, conf, docFrequenciesFeatures, norm, logNormalize, reduceTasks); } HadoopUtil.delete(new Configuration(conf), tfDir); } if (processIdf) { TFIDFConverter.processTfIdf(new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER), outputDir, conf, docFrequenciesFeatures, minDf, maxDF, norm, logNormalize, sequentialAccessOutput, namedVectors, reduceTasks); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } return 0; }
From source file:org.opencloudengine.flamingo.mapreduce.core.AbstractJob.java
/** * ? ??? .// w ww . j av a 2 s . c o m * ? <tt>-h</tt> ? ??? <tt>null</tt>? . * * @param args ?? * @return ?? ??? ? ? {@code Map<String,String>}. * ??? key ? ? ? '--'? prefix . * ? ? {@code Map<String,String>} ? ? ? '--'? ??? . */ public Map<String, String> parseArguments(String[] args) throws Exception { Option helpOpt = addOption(DefaultOptionCreator.helpOption()); addOption("tempDir", null, " ", false); addOption("startPhase", null, " ", "0"); addOption("endPhase", null, " ", String.valueOf(Integer.MAX_VALUE)); GroupBuilder groupBuilder = new GroupBuilder().withName("Hadoop MapReduce Job :"); for (Option opt : options) { groupBuilder = groupBuilder.withOption(opt); } Group group = groupBuilder.create(); CommandLine cmdLine; try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); cmdLine = parser.parse(args); } catch (OptionException e) { log.error(e.getMessage()); CommandLineUtil.printHelpWithGenericOptions(group, e); return null; } if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelpWithGenericOptions(group); return null; } try { parseDirectories(cmdLine); } catch (IllegalArgumentException e) { log.error(e.getMessage()); CommandLineUtil.printHelpWithGenericOptions(group); return null; } argMap = new TreeMap<String, String>(); maybePut(argMap, cmdLine, this.options.toArray(new Option[this.options.size()])); log.info("Command line arguments: ", argMap); Set<String> keySet = argMap.keySet(); for (Iterator<String> iterator = keySet.iterator(); iterator.hasNext();) { String key = iterator.next(); log.info(" {} = {}", key, argMap.get(key)); } return argMap; }
From source file:org.pharmgkb.util.CliHelper.java
/** * Parses arguments./*from ww w .j ava2 s . co m*/ */ public void parse(String[] args) throws OptionException { m_options = m_groupBuilder.create(); Parser parser = new Parser(); parser.setGroup(m_options); parser.setHelpOption(m_helpOption); m_commandLine = parser.parse(args); }
From source file:parse_wikipedia.ParseWikipedia.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dirInputPathOpt = DefaultOptionCreator.inputOption().create(); Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create(); Group group = gbuilder.withName("Options").withOption(dirInputPathOpt).withOption(dirOutputPathOpt) .create();/* w w w . j av a 2 s . c om*/ Parser parser = new Parser(); parser.setGroup(group); try { CommandLine cmdLine = parser.parse(args); String inputPath = (String) cmdLine.getValue(dirInputPathOpt); String outputPath = (String) cmdLine.getValue(dirOutputPathOpt); runJob(inputPath, outputPath); } catch (OptionException | InterruptedException | ClassNotFoundException e) { log.error("Exception", e); } }
From source file:tk.summerway.mahout9.tools.MyClusterDumper.java
private boolean buildParse(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputDirOpt = DefaultOptionCreator.inputOption().create(); Option outputDirOpt = DefaultOptionCreator.outputOption().create(); Option outputFormatOpt = obuilder.withLongName(OUTPUT_FORMAT_OPT) .withArgument(abuilder.withName(OUTPUT_FORMAT_OPT).create()) .withDescription(/*ww w .j a v a2s . c om*/ "The optional output format for the results. Options: TEXT, CSV, JSON or GRAPH_ML. Default is TEXT") .withShortName("of").create(); Option substringOpt = obuilder.withLongName(SUBSTRING_OPTION) .withArgument(abuilder.withName(SUBSTRING_OPTION).create()) .withDescription("The number of chars of the asFormatString() to print").withShortName("b") .create(); Option pointsDirOpt = obuilder.withLongName(POINTS_DIR_OPTION) .withArgument(abuilder.withName(POINTS_DIR_OPTION).create()) .withDescription( "The directory containing points sequence files mapping input vectors to their cluster. " + "If specified, then the program will output the points associated with a cluster") .withShortName("p").create(); Option samplePointsOpt = obuilder.withLongName(SAMPLE_POINTS) .withArgument(abuilder.withName(SAMPLE_POINTS).create()) .withDescription("Specifies the maximum number of points to include _per_ cluster. The default " + "is to include all points") .withShortName("sp").create(); Option dictionaryOpt = obuilder.withLongName(DICTIONARY_OPTION) .withArgument(abuilder.withName(DICTIONARY_OPTION).create()).withDescription("The dictionary file") .withShortName("d").create(); Option dictionaryTypeOpt = obuilder.withLongName(DICTIONARY_TYPE_OPTION) .withArgument(abuilder.withName(DICTIONARY_TYPE_OPTION).create()) .withDescription("The dictionary file type (text|sequencefile), default is text") .withShortName("dt").create(); Option numWordsOpt = obuilder.withLongName(NUM_WORDS_OPTION) .withArgument(abuilder.withName(NUM_WORDS_OPTION).create()) .withDescription("The number of top terms to print").withShortName("n").create(); Option evaluateOpt = obuilder.withLongName(EVALUATE_CLUSTERS) .withArgument(abuilder.withName(EVALUATE_CLUSTERS).create()) .withDescription("Run ClusterEvaluator and CDbwEvaluator over the input. " + "The output will be appended to the rest of the output at the end. Default is false.") .withShortName("e").create(); Option distanceMeasureOpt = obuilder.withLongName("distanceMeasure") .withArgument(abuilder.withName("distanceMeasure").create()) .withDescription("k-means distance measure class name").withShortName("dm").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(outputDirOpt) .withOption(outputFormatOpt).withOption(substringOpt).withOption(pointsDirOpt) .withOption(samplePointsOpt).withOption(dictionaryOpt).withOption(dictionaryTypeOpt) .withOption(numWordsOpt).withOption(evaluateOpt).withOption(distanceMeasureOpt).withOption(helpOpt) .create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return false; } seqFileDir = getInputPath(); inputPath = getInputPath(); inputFile = getInputFile(); if (cmdLine.hasOption(inputDirOpt)) { seqFileDir = new Path(cmdLine.getValue(inputDirOpt).toString()); inputPath = new Path(cmdLine.getValue(inputDirOpt).toString()); inputFile = new File(cmdLine.getValue(inputDirOpt).toString()); } log.info("seqFileDir value: {}", seqFileDir); log.info("inputPath value: {}", inputPath); log.info("inputFile value: {}", inputFile); outputPath = getOutputPath(); outputFile = getOutputFile(); if (cmdLine.hasOption(outputDirOpt)) { outputPath = new Path(cmdLine.getValue(outputDirOpt).toString()); outputFile = new File(cmdLine.getValue(outputDirOpt).toString()); } log.info("outputPath value: {}", outputPath); log.info("outputFile value: {}", outputFile); if (cmdLine.hasOption(pointsDirOpt)) { pointsDir = new Path(cmdLine.getValue(pointsDirOpt).toString()); } log.info("pointsDir value: {}", pointsDir); if (cmdLine.hasOption(substringOpt)) { int sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString()); if (sub >= 0) { subString = sub; } } log.info("subString value: {}", subString); termDictionary = cmdLine.getValue(dictionaryOpt).toString(); dictionaryFormat = cmdLine.getValue(dictionaryTypeOpt).toString(); log.info("termDictionary value: {}", termDictionary); log.info("dictionaryFormat value: {}", dictionaryFormat); if (cmdLine.hasOption(numWordsOpt)) { numTopFeatures = Integer.parseInt(cmdLine.getValue(numWordsOpt).toString()); } log.info("numTopFeatures value: {}", numTopFeatures); outputFormat = OUTPUT_FORMAT.TEXT; if (cmdLine.hasOption(outputFormatOpt)) { outputFormat = OUTPUT_FORMAT.valueOf(cmdLine.getValue(outputFormatOpt).toString()); } log.info("outputFormat value: {}", outputFormat); if (cmdLine.hasOption(samplePointsOpt)) { maxPointsPerCluster = Long.parseLong(cmdLine.getValue(samplePointsOpt).toString()); } else { maxPointsPerCluster = Long.MAX_VALUE; } log.info("maxPointsPerCluster value: {}", maxPointsPerCluster); runEvaluation = cmdLine.hasOption(evaluateOpt); log.info("runEvaluation value: {}", runEvaluation); String distanceMeasureClass = null; if (cmdLine.hasOption(distanceMeasureOpt)) { distanceMeasureClass = cmdLine.getValue(distanceMeasureOpt).toString(); } if (distanceMeasureClass != null) { measure = ClassUtils.instantiateAs(distanceMeasureClass, DistanceMeasure.class); } log.info("distanceMeasureClass value: {}", distanceMeasureClass); } catch (OptionException e) { CommandLineUtil.printHelp(group); log.error("parse para error", e); } return true; }