List of usage examples for org.apache.commons.cli2 CommandLine getValue
Object getValue(final Option option) throws IllegalStateException;
From source file:org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver.java
public static void main(String[] args) throws Exception { Option inputOpt = DefaultOptionCreator.inputOption().create(); Option outputOpt = DefaultOptionCreator.outputOption().create(); Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create(); Option clustersOpt = DefaultOptionCreator.clustersInOption() .withDescription(//ww w . ja va 2 s . co m "The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. " + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first") .create(); Option kOpt = DefaultOptionCreator.kOption() .withDescription( "The k in k-Means. If specified, then a random selection of k Vectors will be chosen" + " as the Centroid and written to the clusters input path.") .create(); Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create(); Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().create(); Option helpOpt = DefaultOptionCreator.helpOption(); Option overwriteOutput = DefaultOptionCreator.overwriteOption().create(); Option mOpt = DefaultOptionCreator.mOption().create(); Option numReduceTasksOpt = DefaultOptionCreator.numReducersOption().create(); Option numMapTasksOpt = DefaultOptionCreator.numMappersOption().create(); Option clusteringOpt = DefaultOptionCreator.clusteringOption().create(); Option emitMostLikelyOpt = DefaultOptionCreator.emitMostLikelyOption().create(); Option thresholdOpt = DefaultOptionCreator.thresholdOption().create(); Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(clustersOpt) .withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt) .withOption(maxIterationsOpt).withOption(kOpt).withOption(mOpt).withOption(overwriteOutput) .withOption(helpOpt).withOption(numMapTasksOpt).withOption(numReduceTasksOpt) .withOption(emitMostLikelyOpt).withOption(thresholdOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } Path input = new Path(cmdLine.getValue(inputOpt).toString()); Path clusters = new Path(cmdLine.getValue(clustersOpt).toString()); Path output = new Path(cmdLine.getValue(outputOpt).toString()); String measureClass = SquaredEuclideanDistanceMeasure.class.getName(); if (cmdLine.hasOption(measureClassOpt)) { measureClass = cmdLine.getValue(measureClassOpt).toString(); } double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString()); float m = Float.parseFloat(cmdLine.getValue(mOpt).toString()); int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString()); int numMapTasks = Integer.parseInt(cmdLine.getValue(numMapTasksOpt).toString()); int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString()); if (cmdLine.hasOption(overwriteOutput)) { HadoopUtil.overwriteOutput(output); } boolean emitMostLikely = Boolean.parseBoolean(cmdLine.getValue(emitMostLikelyOpt).toString()); double threshold = Double.parseDouble(cmdLine.getValue(thresholdOpt).toString()); if (cmdLine.hasOption(kOpt)) { clusters = RandomSeedGenerator.buildRandom(input, clusters, Integer.parseInt(cmdLine.getValue(kOpt).toString())); } runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numMapTasks, numReduceTasks, m, cmdLine.hasOption(clusteringOpt), emitMostLikely, threshold); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.clustering.kmeans.KMeansDriver.java
public static void main(String[] args) throws Exception { Option inputOpt = DefaultOptionCreator.inputOption().create(); Option clustersOpt = DefaultOptionCreator.clustersInOption() .withDescription(//from www . j a va 2s . c o m "The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. " + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first") .create(); Option kOpt = DefaultOptionCreator.kOption() .withDescription( "The k in k-Means. If specified, then a random selection of k Vectors will be chosen" + " as the Centroid and written to the clusters input path.") .create(); Option outputOpt = DefaultOptionCreator.outputOption().create(); Option overwriteOutput = DefaultOptionCreator.overwriteOption().create(); Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create(); Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create(); Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().create(); Option numReduceTasksOpt = DefaultOptionCreator.numReducersOption().create(); Option clusteringOpt = DefaultOptionCreator.clusteringOption().create(); Option helpOpt = DefaultOptionCreator.helpOption(); Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(clustersOpt) .withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt) .withOption(maxIterationsOpt).withOption(numReduceTasksOpt).withOption(kOpt) .withOption(overwriteOutput).withOption(helpOpt).withOption(clusteringOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } Path input = new Path(cmdLine.getValue(inputOpt).toString()); Path clusters = new Path(cmdLine.getValue(clustersOpt).toString()); Path output = new Path(cmdLine.getValue(outputOpt).toString()); String measureClass = cmdLine.getValue(measureClassOpt).toString(); double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString()); int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString()); int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString()); if (cmdLine.hasOption(overwriteOutput)) { HadoopUtil.overwriteOutput(output); } if (cmdLine.hasOption(kOpt)) { clusters = RandomSeedGenerator.buildRandom(input, clusters, Integer.parseInt(cmdLine.getValue(kOpt).toString())); } runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks, cmdLine.hasOption(clusteringOpt)); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.clustering.lda.cvb.InMemoryCollapsedVariationalBayes0.java
public static int main2(String[] args, Configuration conf) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having " + "one doc per line") .withShortName("i").create(); Option dictOpt = obuilder.withLongName("dictionary").withRequired(false) .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create()) .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create(); Option dfsOpt = obuilder.withLongName("dfs").withRequired(false) .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS namenode URI").withShortName("dfs").create(); Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true) .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create()) .withDescription("Number of topics to learn").withShortName("top").create(); Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true) .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to write out p(term | topic)").withShortName("to").create(); Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true) .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to write out p(topic | docid)").withShortName("do").create(); Option alphaOpt = obuilder.withLongName("alpha").withRequired(false) .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create()) .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create(); Option etaOpt = obuilder.withLongName("eta").withRequired(false) .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create()) .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create(); Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false) .withArgument(// w w w . j ava2 s .co m abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault("10").create()) .withDescription("Maximum number of training passes").withShortName("m").create(); Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false) .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1) .withDefault("0.0").create()) .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|") .create(); Option burnInOpt = obuilder .withLongName("burnInIterations").withRequired(false).withArgument(abuilder .withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault("5").create()) .withDescription("Minimum number of iterations").withShortName("b").create(); Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false) .withArgument( abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create()) .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c") .create(); Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false) .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no") .create()) .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt") .create(); Option numTrainThreadsOpt = obuilder .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create()) .withDescription("number of threads to train with").withShortName("ntt").create(); Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false) .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1") .create()) .withDescription("number of threads to update the model with").withShortName("nut").create(); Option verboseOpt = obuilder.withLongName("verbose").withRequired(false) .withArgument( abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create()) .withDescription("print verbose information, like top-terms in each topic, during iteration") .withShortName("v").create(); Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt) .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt) .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt) .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt) .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt) .withOption(modelCorpusFractionOption).withOption(verboseOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return -1; } String inputDirString = (String) cmdLine.getValue(inputDirOpt); String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null; int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt)); double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt)); double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt)); int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt)); int burnInIterations = Integer.parseInt((String) cmdLine.getValue(burnInOpt)); double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt)); int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt)); int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt)); String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt); String docOutFile = (String) cmdLine.getValue(outputDocFileOpt); //String reInferDocTopics = (String)cmdLine.getValue(reInferDocTopicsOpt); boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt)); double modelCorpusFraction = Double.parseDouble((String) cmdLine.getValue(modelCorpusFractionOption)); long start = System.nanoTime(); if (conf.get("fs.default.name") == null) { String dfsNameNode = (String) cmdLine.getValue(dfsOpt); conf.set("fs.default.name", dfsNameNode); } String[] terms = loadDictionary(dictDirString, conf); logTime("dictionary loading", System.nanoTime() - start); start = System.nanoTime(); Matrix corpus = loadVectors(inputDirString, conf); logTime("vector seqfile corpus loading", System.nanoTime() - start); start = System.nanoTime(); InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms, numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction); logTime("cvb0 init", System.nanoTime() - start); start = System.nanoTime(); cvb0.setVerbose(verbose); cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations); logTime("total training time", System.nanoTime() - start); /* if ("randstart".equalsIgnoreCase(reInferDocTopics)) { cvb0.inferDocuments(0.0, 100, true); } else if ("continue".equalsIgnoreCase(reInferDocTopics)) { cvb0.inferDocuments(0.0, 100, false); } */ start = System.nanoTime(); cvb0.writeModel(new Path(topicOutFile)); DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts); logTime("printTopics", System.nanoTime() - start); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); } return 0; }
From source file:org.apache.mahout.clustering.lda.LDADriver.java
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException { Option inputOpt = DefaultOptionCreator.inputOption().create(); Option outputOpt = DefaultOptionCreator.outputOption().create(); Option overwriteOutput = DefaultOptionCreator.overwriteOption().create(); Option topicsOpt = DefaultOptionCreator.numTopicsOption().create(); Option wordsOpt = DefaultOptionCreator.numWordsOption().create(); Option topicSmOpt = DefaultOptionCreator.topicSmoothingOption().create(); Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create(); Option numReducOpt = DefaultOptionCreator.numReducersOption().create(); Option helpOpt = DefaultOptionCreator.helpOption(); Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt) .withOption(topicsOpt).withOption(wordsOpt).withOption(topicSmOpt).withOption(maxIterOpt) .withOption(numReducOpt).withOption(overwriteOutput).withOption(helpOpt).create(); try {//from w w w .ja va 2 s .co m Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } Path input = new Path(cmdLine.getValue(inputOpt).toString()); Path output = new Path(cmdLine.getValue(outputOpt).toString()); if (cmdLine.hasOption(overwriteOutput)) { HadoopUtil.overwriteOutput(output); } int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString()); int numReduceTasks = Integer.parseInt(cmdLine.getValue(numReducOpt).toString()); int numTopics = Integer.parseInt(cmdLine.getValue(topicsOpt).toString()); int numWords = Integer.parseInt(cmdLine.getValue(wordsOpt).toString()); double topicSmoothing = Double.parseDouble(cmdLine.getValue(maxIterOpt).toString()); if (topicSmoothing < 1) { topicSmoothing = 50.0 / numTopics; } runJob(input, output, numTopics, numWords, topicSmoothing, maxIterations, numReduceTasks); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.clustering.lda.LDAPrintTopics.java
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = DefaultOptionCreator.inputOption().create(); Option dictOpt = obuilder.withLongName("dict").withRequired(true) .withArgument(abuilder.withName("dict").withMinimum(1).withMaximum(1).create()) .withDescription("Dictionary to read in, in the same format as one created by " + "org.apache.mahout.utils.vectors.lucene.Driver") .withShortName("d").create(); Option outOpt = DefaultOptionCreator.outputOption().create(); Option wordOpt = obuilder.withLongName("words").withRequired(false) .withArgument(abuilder.withName("words").withMinimum(0).withMaximum(1).withDefault("20").create()) .withDescription("Number of words to print").withShortName("w").create(); Option dictTypeOpt = obuilder.withLongName("dictionaryType").withRequired(false) .withArgument(abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create()) .withDescription("The dictionary file type (text|sequencefile)").withShortName("dt").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create();//from w ww. j a v a2s.c o m Group group = gbuilder.withName("Options").withOption(dictOpt).withOption(outOpt).withOption(wordOpt) .withOption(inputOpt).withOption(dictTypeOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String input = cmdLine.getValue(inputOpt).toString(); String dictFile = cmdLine.getValue(dictOpt).toString(); int numWords = 20; if (cmdLine.hasOption(wordOpt)) { numWords = Integer.parseInt(cmdLine.getValue(wordOpt).toString()); } Configuration config = new Configuration(); String dictionaryType = "text"; if (cmdLine.hasOption(dictTypeOpt)) { dictionaryType = cmdLine.getValue(dictTypeOpt).toString(); } List<String> wordList; if ("text".equals(dictionaryType)) { wordList = Arrays.asList(VectorHelper.loadTermDictionary(new File(dictFile))); } else if ("sequencefile".equals(dictionaryType)) { wordList = Arrays.asList(VectorHelper.loadTermDictionary(config, dictFile)); } else { throw new IllegalArgumentException("Invalid dictionary format"); } List<Queue<Pair<String, Double>>> topWords = topWordsForTopics(input, config, wordList, numWords); File output = null; if (cmdLine.hasOption(outOpt)) { output = new File(cmdLine.getValue(outOpt).toString()); if (!output.exists() && !output.mkdirs()) { throw new IOException("Could not create directory: " + output); } } printTopWords(topWords, output); } catch (OptionException e) { CommandLineUtil.printHelp(group); throw e; } }
From source file:org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver.java
public static void main(String[] args) throws IOException { Option inputOpt = DefaultOptionCreator.inputOption().create(); Option outputOpt = DefaultOptionCreator.outputOption().create(); Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create(); Option helpOpt = DefaultOptionCreator.helpOption(); Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().create(); Option overwriteOutput = DefaultOptionCreator.overwriteOption().create(); Option inputIsCanopiesOpt = DefaultOptionCreator.inputIsCanopiesOption().create(); Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create(); Option threshold1Opt = DefaultOptionCreator.t1Option().create(); Option threshold2Opt = DefaultOptionCreator.t2Option().create(); Option clusteringOpt = DefaultOptionCreator.clusteringOption().create(); Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt) .withOption(overwriteOutput).withOption(measureClassOpt).withOption(helpOpt) .withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(threshold2Opt) .withOption(clusteringOpt).withOption(maxIterOpt).withOption(inputIsCanopiesOpt).create(); try {//from w w w . ja v a 2 s . c o m Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } Path input = new Path(cmdLine.getValue(inputOpt).toString()); Path output = new Path(cmdLine.getValue(outputOpt).toString()); String measureClass = cmdLine.getValue(measureClassOpt).toString(); if (cmdLine.hasOption(overwriteOutput)) { HadoopUtil.overwriteOutput(output); } double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString()); double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString()); double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString()); int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString()); runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations, cmdLine.hasOption(inputIsCanopiesOpt), cmdLine.hasOption(clusteringOpt)); } catch (OptionException e) { log.error("Exception parsing command line: ", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.clustering.streaming.tools.ClusterQualitySummarizer.java
private boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFileOption = builder.withLongName("input").withShortName("i").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription("where to get seq files with the vectors (training set)").create(); Option testInputFileOption = builder.withLongName("testInput").withShortName("itest") .withArgument(argumentBuilder.withName("testInput").withMaximum(1).create()) .withDescription("where to get seq files with the vectors (test set)").create(); Option centroidsFileOption = builder.withLongName("centroids").withShortName("c").withRequired(true) .withArgument(argumentBuilder.withName("centroids").withMaximum(1).create()) .withDescription(//from www .j av a2 s.com "where to get seq files with the centroids (from Mahout KMeans or StreamingKMeansDriver)") .create(); Option centroidsCompareFileOption = builder.withLongName("centroidsCompare").withShortName("cc") .withRequired(false) .withArgument(argumentBuilder.withName("centroidsCompare").withMaximum(1).create()) .withDescription("where to get seq files with the second set of centroids (from Mahout KMeans or " + "StreamingKMeansDriver)") .create(); Option outputFileOption = builder.withLongName("output").withShortName("o").withRequired(true) .withArgument(argumentBuilder.withName("output").withMaximum(1).create()) .withDescription("where to dump the CSV file with the results").create(); Option mahoutKMeansFormatOption = builder.withLongName("mahoutkmeansformat").withShortName("mkm") .withDescription("if set, read files as (IntWritable, ClusterWritable) pairs") .withArgument(argumentBuilder.withName("numpoints").withMaximum(1).create()).create(); Option mahoutKMeansCompareFormatOption = builder.withLongName("mahoutkmeansformatCompare") .withShortName("mkmc").withDescription("if set, read files as (IntWritable, ClusterWritable) pairs") .withArgument(argumentBuilder.withName("numpoints").withMaximum(1).create()).create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption) .withOption(testInputFileOption).withOption(outputFileOption).withOption(centroidsFileOption) .withOption(centroidsCompareFileOption).withOption(mahoutKMeansFormatOption) .withOption(mahoutKMeansCompareFormatOption).create(); Parser parser = new Parser(); parser.setHelpOption(help); parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 150)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } trainFile = (String) cmdLine.getValue(inputFileOption); if (cmdLine.hasOption(testInputFileOption)) { testFile = (String) cmdLine.getValue(testInputFileOption); } centroidFile = (String) cmdLine.getValue(centroidsFileOption); if (cmdLine.hasOption(centroidsCompareFileOption)) { centroidCompareFile = (String) cmdLine.getValue(centroidsCompareFileOption); } outputFile = (String) cmdLine.getValue(outputFileOption); if (cmdLine.hasOption(mahoutKMeansFormatOption)) { mahoutKMeansFormat = true; } if (cmdLine.hasOption(mahoutKMeansCompareFormatOption)) { mahoutKMeansFormatCompare = true; } return true; }
From source file:org.apache.mahout.clustering.streaming.tools.ResplitSequenceFiles.java
private boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFileOption = builder.withLongName("input").withShortName("i").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription(/*from w w w .ja va2 s . c om*/ "what the base folder for sequence files is (they all must have the same key/value type") .create(); Option outputFileOption = builder.withLongName("output").withShortName("o").withRequired(true) .withArgument(argumentBuilder.withName("output").withMaximum(1).create()) .withDescription( "the base name of the file split that the files will be split it; the i'th split has the " + "suffix -i") .create(); Option numSplitsOption = builder.withLongName("numSplits").withShortName("ns").withRequired(true) .withArgument(argumentBuilder.withName("numSplits").withMaximum(1).create()) .withDescription("how many splits to use for the given files").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption) .withOption(outputFileOption).withOption(numSplitsOption).create(); Parser parser = new Parser(); parser.setHelpOption(help); parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } inputFile = (String) cmdLine.getValue(inputFileOption); outputFileBase = (String) cmdLine.getValue(outputFileOption); numSplits = Integer.parseInt((String) cmdLine.getValue(numSplitsOption)); return true; }
From source file:org.apache.mahout.df.BreimanExample.java
@Override public int run(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Data path").create(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()) .withDescription("Dataset path").create(); Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true) .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create()) .withDescription("Number of trees to grow, each iteration").create(); Option nbItersOpt = obuilder.withLongName("iterations").withShortName("i").withRequired(true) .withArgument(abuilder.withName("numIterations").withMinimum(1).withMaximum(1).create()) .withDescription("Number of times to repeat the test").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create();// ww w . ja va 2 s .c om Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(nbItersOpt) .withOption(nbtreesOpt).withOption(helpOpt).create(); Path dataPath; Path datasetPath; int nbTrees; int nbIterations; try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption("help")) { CommandLineUtil.printHelp(group); return -1; } String dataName = cmdLine.getValue(dataOpt).toString(); String datasetName = cmdLine.getValue(datasetOpt).toString(); nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString()); nbIterations = Integer.parseInt(cmdLine.getValue(nbItersOpt).toString()); dataPath = new Path(dataName); datasetPath = new Path(datasetName); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); return -1; } // load the data FileSystem fs = dataPath.getFileSystem(new Configuration()); Dataset dataset = Dataset.load(getConf(), datasetPath); Data data = DataLoader.loadData(dataset, fs, dataPath); // take m to be the first integer less than log2(M) + 1, where M is the // number of inputs int m = (int) Math.floor(Maths.log(2, data.getDataset().nbAttributes()) + 1); Random rng = RandomUtils.getRandom(); for (int iteration = 0; iteration < nbIterations; iteration++) { log.info("Iteration {}", iteration); runIteration(rng, data, m, nbTrees); } log.info("********************************************"); log.info("Selection error : {}", sumTestErr / nbIterations); log.info("Single Input error : {}", sumOneErr / nbIterations); log.info("One Tree error : {}", sumTreeErr / nbIterations); log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations)); log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations)); log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations); log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations); return 0; }
From source file:org.apache.mahout.df.mapred.BuildForest.java
@Override public int run(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option oobOpt = obuilder.withShortName("oob").withRequired(false) .withDescription("Optional, estimate the out-of-bag error").create(); Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Data path").create(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()) .withDescription("Dataset path").create(); Option selectionOpt = obuilder.withLongName("selection").withShortName("sl").withRequired(true) .withArgument(abuilder.withName("m").withMinimum(1).withMaximum(1).create()) .withDescription("Number of variables to select randomly at each tree-node").create(); Option seedOpt = obuilder.withLongName("seed").withShortName("sd").withRequired(false) .withArgument(abuilder.withName("seed").withMinimum(1).withMaximum(1).create()) .withDescription("Optional, seed value used to initialise the Random number generator").create(); Option partialOpt = obuilder.withLongName("partial").withShortName("p").withRequired(false) .withDescription("Optional, use the Partial Data implementation").create(); Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true) .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create()) .withDescription("Number of trees to grow").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create();//from w w w .j a v a 2s . c o m Group group = gbuilder.withName("Options").withOption(oobOpt).withOption(dataOpt).withOption(datasetOpt) .withOption(selectionOpt).withOption(seedOpt).withOption(partialOpt).withOption(nbtreesOpt) .withOption(helpOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption("help")) { CommandLineUtil.printHelp(group); return -1; } isPartial = cmdLine.hasOption(partialOpt); isOob = cmdLine.hasOption(oobOpt); String dataName = cmdLine.getValue(dataOpt).toString(); String datasetName = cmdLine.getValue(datasetOpt).toString(); m = Integer.parseInt(cmdLine.getValue(selectionOpt).toString()); nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString()); if (cmdLine.hasOption(seedOpt)) { seed = Long.valueOf(cmdLine.getValue(seedOpt).toString()); } log.debug("data : {}", dataName); log.debug("dataset : {}", datasetName); log.debug("m : {}", m); log.debug("seed : {}", seed); log.debug("nbtrees : {}", nbTrees); log.debug("isPartial : {}", isPartial); log.debug("isOob : {}", isOob); dataPath = new Path(dataName); datasetPath = new Path(datasetName); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); return -1; } buildForest(); return 0; }