List of usage examples for org.apache.commons.cli2.commandline Parser Parser
Parser
From source file:org.apache.mahout.classifier.sequencelearning.hmm.ViterbiEvaluator.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputOption = DefaultOptionCreator.inputOption().create(); Option outputOption = DefaultOptionCreator.outputOption().create(); Option modelOption = optionBuilder.withLongName("model").withDescription("Path to serialized HMM model") .withShortName("m") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option likelihoodOption = optionBuilder.withLongName("likelihood") .withDescription("Compute likelihood of observed sequence").withShortName("l").withRequired(false) .create();/*w w w.j av a2 s . com*/ Group optionGroup = new GroupBuilder().withOption(inputOption).withOption(outputOption) .withOption(modelOption).withOption(likelihoodOption).withName("Options").create(); try { Parser parser = new Parser(); parser.setGroup(optionGroup); CommandLine commandLine = parser.parse(args); String input = (String) commandLine.getValue(inputOption); String output = (String) commandLine.getValue(outputOption); String modelPath = (String) commandLine.getValue(modelOption); boolean computeLikelihood = commandLine.hasOption(likelihoodOption); //reading serialized HMM DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath)); HmmModel model; try { model = LossyHmmSerializer.deserialize(modelStream); } finally { Closeables.close(modelStream, true); } //reading observations List<Integer> observations = Lists.newArrayList(); Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8"); try { while (scanner.hasNextInt()) { observations.add(scanner.nextInt()); } } finally { scanner.close(); } int[] observationsArray = new int[observations.size()]; for (int i = 0; i < observations.size(); ++i) { observationsArray[i] = observations.get(i); } //decoding int[] hiddenStates = HmmEvaluator.decode(model, observationsArray, true); //writing output PrintWriter writer = new PrintWriter( new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true); try { for (int hiddenState : hiddenStates) { writer.print(hiddenState); writer.print(' '); } } finally { Closeables.close(writer, false); } if (computeLikelihood) { System.out.println("Likelihood: " + HmmEvaluator.modelLikelihood(model, observationsArray, true)); } } catch (OptionException e) { CommandLineUtil.printHelp(optionGroup); } }
From source file:org.apache.mahout.classifier.sgd.RunAdaptiveLogistic.java
private static boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFileOption = builder.withLongName("input").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription("where to get training data").create(); Option modelFileOption = builder.withLongName("model").withRequired(true) .withArgument(argumentBuilder.withName("model").withMaximum(1).create()) .withDescription("where to get the trained model").create(); Option outputFileOption = builder.withLongName("output").withRequired(true) .withDescription("the file path to output scores") .withArgument(argumentBuilder.withName("output").withMaximum(1).create()).create(); Option idColumnOption = builder.withLongName("idcolumn").withRequired(true) .withDescription("the name of the id column for each record") .withArgument(argumentBuilder.withName("idcolumn").withMaximum(1).create()).create(); Option maxScoreOnlyOption = builder.withLongName("maxscoreonly") .withDescription("only output the target label with max scores").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFileOption) .withOption(modelFileOption).withOption(outputFileOption).withOption(idColumnOption) .withOption(maxScoreOnlyOption).create(); Parser parser = new Parser(); parser.setHelpOption(help);/*from w w w . ja va 2s.c o m*/ parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } inputFile = getStringArgument(cmdLine, inputFileOption); modelFile = getStringArgument(cmdLine, modelFileOption); outputFile = getStringArgument(cmdLine, outputFileOption); idColumn = getStringArgument(cmdLine, idColumnOption); maxScoreOnly = getBooleanArgument(cmdLine, maxScoreOnlyOption); return true; }
From source file:org.apache.mahout.classifier.sgd.RunLogistic.java
private static boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create(); Option auc = builder.withLongName("auc").withDescription("print AUC").create(); Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create(); Option scores = builder.withLongName("scores").withDescription("print scores").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFileOption = builder.withLongName("input").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription("where to get training data").create(); Option modelFileOption = builder.withLongName("model").withRequired(true) .withArgument(argumentBuilder.withName("model").withMaximum(1).create()) .withDescription("where to get a model").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(auc).withOption(scores) .withOption(confusion).withOption(inputFileOption).withOption(modelFileOption).create(); Parser parser = new Parser(); parser.setHelpOption(help);/* w w w. ja va 2 s . c o m*/ parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } inputFile = getStringArgument(cmdLine, inputFileOption); modelFile = getStringArgument(cmdLine, modelFileOption); showAuc = getBooleanArgument(cmdLine, auc); showScores = getBooleanArgument(cmdLine, scores); showConfusion = getBooleanArgument(cmdLine, confusion); return true; }
From source file:org.apache.mahout.classifier.sgd.TestASFEmail.java
boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFileOption = builder.withLongName("input").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription("where to get training data").create(); Option modelFileOption = builder.withLongName("model").withRequired(true) .withArgument(argumentBuilder.withName("model").withMaximum(1).create()) .withDescription("where to get a model").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption) .withOption(modelFileOption).create(); Parser parser = new Parser(); parser.setHelpOption(help);// w w w .j ava 2 s.co m parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } inputFile = (String) cmdLine.getValue(inputFileOption); modelFile = (String) cmdLine.getValue(modelFileOption); return true; }
From source file:org.apache.mahout.classifier.sgd.TrainAdaptiveLogistic.java
private static boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option showperf = builder.withLongName("showperf") .withDescription("output performance measures during training").create(); Option inputFile = builder.withLongName("input").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription("where to get training data").create(); Option outputFile = builder.withLongName("output").withRequired(true) .withArgument(argumentBuilder.withName("output").withMaximum(1).create()) .withDescription("where to write the model content").create(); Option threads = builder.withLongName("threads") .withArgument(argumentBuilder.withName("threads").withDefault("4").create()) .withDescription("the number of threads AdaptiveLogisticRegression uses").create(); Option predictors = builder.withLongName("predictors").withRequired(true) .withArgument(argumentBuilder.withName("predictors").create()) .withDescription("a list of predictor variables").create(); Option types = builder.withLongName("types").withRequired(true) .withArgument(argumentBuilder.withName("types").create()) .withDescription("a list of predictor variable types (numeric, word, or text)").create(); Option target = builder.withLongName("target").withDescription("the name of the target variable") .withRequired(true).withArgument(argumentBuilder.withName("target").withMaximum(1).create()) .create();/*from ww w . j a v a2s.c o m*/ Option targetCategories = builder.withLongName("categories") .withDescription("the number of target categories to be considered").withRequired(true) .withArgument(argumentBuilder.withName("categories").withMaximum(1).create()).create(); Option features = builder.withLongName("features") .withDescription("the number of internal hashed features to use") .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create()) .create(); Option passes = builder.withLongName("passes") .withDescription("the number of times to pass over the input data") .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create()).create(); Option interval = builder.withLongName("interval") .withArgument(argumentBuilder.withName("interval").withDefault("500").create()) .withDescription("the interval property of AdaptiveLogisticRegression").create(); Option window = builder.withLongName("window") .withArgument(argumentBuilder.withName("window").withDefault("800").create()) .withDescription("the average propery of AdaptiveLogisticRegression").create(); Option skipperfnum = builder.withLongName("skipperfnum") .withArgument(argumentBuilder.withName("skipperfnum").withDefault("99").create()) .withDescription("show performance measures every (skipperfnum + 1) rows").create(); Option prior = builder.withLongName("prior") .withArgument(argumentBuilder.withName("prior").withDefault("L1").create()) .withDescription("the prior algorithm to use: L1, L2, ebp, tp, up").create(); Option priorOption = builder.withLongName("prioroption") .withArgument(argumentBuilder.withName("prioroption").create()) .withDescription("constructor parameter for ElasticBandPrior and TPrior").create(); Option auc = builder.withLongName("auc") .withArgument(argumentBuilder.withName("auc").withDefault("global").create()) .withDescription("the auc to use: global or grouped").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile) .withOption(outputFile).withOption(target).withOption(targetCategories).withOption(predictors) .withOption(types).withOption(passes).withOption(interval).withOption(window).withOption(threads) .withOption(prior).withOption(features).withOption(showperf).withOption(skipperfnum) .withOption(priorOption).withOption(auc).create(); Parser parser = new Parser(); parser.setHelpOption(help); parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } TrainAdaptiveLogistic.inputFile = getStringArgument(cmdLine, inputFile); TrainAdaptiveLogistic.outputFile = getStringArgument(cmdLine, outputFile); List<String> typeList = Lists.newArrayList(); for (Object x : cmdLine.getValues(types)) { typeList.add(x.toString()); } List<String> predictorList = Lists.newArrayList(); for (Object x : cmdLine.getValues(predictors)) { predictorList.add(x.toString()); } lmp = new AdaptiveLogisticModelParameters(); lmp.setTargetVariable(getStringArgument(cmdLine, target)); lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories)); lmp.setNumFeatures(getIntegerArgument(cmdLine, features)); lmp.setInterval(getIntegerArgument(cmdLine, interval)); lmp.setAverageWindow(getIntegerArgument(cmdLine, window)); lmp.setThreads(getIntegerArgument(cmdLine, threads)); lmp.setAuc(getStringArgument(cmdLine, auc)); lmp.setPrior(getStringArgument(cmdLine, prior)); if (cmdLine.getValue(priorOption) != null) { lmp.setPriorOption(getDoubleArgument(cmdLine, priorOption)); } lmp.setTypeMap(predictorList, typeList); TrainAdaptiveLogistic.showperf = getBooleanArgument(cmdLine, showperf); TrainAdaptiveLogistic.skipperfnum = getIntegerArgument(cmdLine, skipperfnum); TrainAdaptiveLogistic.passes = getIntegerArgument(cmdLine, passes); lmp.checkParameters(); return true; }
From source file:org.apache.mahout.classifier.sgd.TrainLogistic.java
private static boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create(); Option scores = builder.withLongName("scores").withDescription("output score diagnostics during training") .create();//from w w w . ja va2 s.c o m ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFile = builder.withLongName("input").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription("where to get training data").create(); Option outputFile = builder.withLongName("output").withRequired(true) .withArgument(argumentBuilder.withName("output").withMaximum(1).create()) .withDescription("where to get training data").create(); Option predictors = builder.withLongName("predictors").withRequired(true) .withArgument(argumentBuilder.withName("p").create()) .withDescription("a list of predictor variables").create(); Option types = builder.withLongName("types").withRequired(true) .withArgument(argumentBuilder.withName("t").create()) .withDescription("a list of predictor variable types (numeric, word, or text)").create(); Option target = builder.withLongName("target").withRequired(true) .withArgument(argumentBuilder.withName("target").withMaximum(1).create()) .withDescription("the name of the target variable").create(); Option features = builder.withLongName("features") .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create()) .withDescription("the number of internal hashed features to use").create(); Option passes = builder.withLongName("passes") .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create()) .withDescription("the number of times to pass over the input data").create(); Option lambda = builder.withLongName("lambda") .withArgument(argumentBuilder.withName("lambda").withDefault("1e-4").withMaximum(1).create()) .withDescription("the amount of coefficient decay to use").create(); Option rate = builder.withLongName("rate") .withArgument(argumentBuilder.withName("learningRate").withDefault("1e-3").withMaximum(1).create()) .withDescription("the learning rate").create(); Option noBias = builder.withLongName("noBias").withDescription("don't include a bias term").create(); Option targetCategories = builder.withLongName("categories").withRequired(true) .withArgument(argumentBuilder.withName("number").withMaximum(1).create()) .withDescription("the number of target categories to be considered").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile) .withOption(outputFile).withOption(target).withOption(targetCategories).withOption(predictors) .withOption(types).withOption(passes).withOption(lambda).withOption(rate).withOption(noBias) .withOption(features).create(); Parser parser = new Parser(); parser.setHelpOption(help); parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } TrainLogistic.inputFile = getStringArgument(cmdLine, inputFile); TrainLogistic.outputFile = getStringArgument(cmdLine, outputFile); List<String> typeList = Lists.newArrayList(); for (Object x : cmdLine.getValues(types)) { typeList.add(x.toString()); } List<String> predictorList = Lists.newArrayList(); for (Object x : cmdLine.getValues(predictors)) { predictorList.add(x.toString()); } lmp = new LogisticModelParameters(); lmp.setTargetVariable(getStringArgument(cmdLine, target)); lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories)); lmp.setNumFeatures(getIntegerArgument(cmdLine, features)); lmp.setUseBias(!getBooleanArgument(cmdLine, noBias)); lmp.setTypeMap(predictorList, typeList); lmp.setLambda(getDoubleArgument(cmdLine, lambda)); lmp.setLearningRate(getDoubleArgument(cmdLine, rate)); TrainLogistic.scores = getBooleanArgument(cmdLine, scores); TrainLogistic.passes = getIntegerArgument(cmdLine, passes); return true; }
From source file:org.apache.mahout.classifier.sgd.ValidateAdaptiveLogistic.java
private static boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create(); Option auc = builder.withLongName("auc").withDescription("print AUC").create(); Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create(); Option scores = builder.withLongName("scores").withDescription("print scores").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFileOption = builder.withLongName("input").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription("where to get validate data").create(); Option modelFileOption = builder.withLongName("model").withRequired(true) .withArgument(argumentBuilder.withName("model").withMaximum(1).create()) .withDescription("where to get the trained model").create(); Option defaultCagetoryOption = builder.withLongName("defaultCategory").withRequired(false) .withArgument(//from w w w .ja va 2s . c o m argumentBuilder.withName("defaultCategory").withMaximum(1).withDefault("unknown").create()) .withDescription("the default category value to use").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(auc).withOption(scores) .withOption(confusion).withOption(inputFileOption).withOption(modelFileOption) .withOption(defaultCagetoryOption).create(); Parser parser = new Parser(); parser.setHelpOption(help); parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } inputFile = getStringArgument(cmdLine, inputFileOption); modelFile = getStringArgument(cmdLine, modelFileOption); defaultCategory = getStringArgument(cmdLine, defaultCagetoryOption); showAuc = getBooleanArgument(cmdLine, auc); showScores = getBooleanArgument(cmdLine, scores); showConfusion = getBooleanArgument(cmdLine, confusion); return true; }
From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelClassifierDriver.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, OptionException { // example args: // -if /user/maximzhao/dataset/rcv1_test.binary -of // /user/maximzhao/rcv.result // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt // 1080//from w ww . ja v a2 s .com log.info("[job] " + JOB_NAME); DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option testFileOpt = obuilder.withLongName("testFile").withRequired(true) .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create()) .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create(); Option outputFileOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Out put file name: ").withShortName("of").create(); Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false) .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create(); Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true) .withArgument(abuilder.withName("modelFile").withMinimum(1).withMaximum(1).create()) .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create(); Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false) .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max map Split size ").withShortName("ms").create(); Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false) .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max Heap Size: ").withShortName("mhs").create(); Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false) .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create()) .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create(); Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false) .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create()) .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt) .withOption(mapSplitSizeOpt).withOption(hdfsServerOpt).withOption(outputFileOpt) .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt) .withOption(helpOpt).create(); SVMParameters para = new SVMParameters(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } para.setTestFile(cmdLine.getValue(testFileOpt).toString()); para.setOutFile(cmdLine.getValue(outputFileOpt).toString()); para.setModelFileName(cmdLine.getValue(modelFileOpt).toString()); // hdfs server address if (cmdLine.hasOption(hdfsServerOpt)) { para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString()); } if (cmdLine.hasOption(mapSplitSizeOpt)) { para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString())); } if (cmdLine.hasOption(numberofReducersOpt)) { para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString())); } if (cmdLine.hasOption(maxHeapSizeOpt)) { para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString()); } if (cmdLine.hasOption(taskTimeoutOpt)) { para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString())); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } // set parameters for the mapper, combiner, reducer // creat a job Job job = new Job(new Configuration()); // step 1.1 set job static parameters ParallelClassifierJob.setJobParameters(job); // step 1.2 set mapper parameters ParallelClassifierJob.setMapperParameters(job.getConfiguration(), para.getHdfsServerAddr(), para.getModelFileName()); // set general parameters related to a job MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(), para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout()); // submit a job log.info("job completed: " + MapReduceUtil.submitJob(job)); }
From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassifierTrainDriver.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, OptionException { // args = new String [] {"-if","infile","-of","outfile","m", // "-nm","10","--nr","11"}; log.info("[job] " + JOB_NAME); DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option trainFileOpt = obuilder.withLongName("trainFile").withRequired(true) .withArgument(abuilder.withName("trainFile").withMinimum(1).withMaximum(1).create()) .withDescription("Training data set file").withShortName("if").create(); Option outputFileOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Out put file name: ").withShortName("of").create(); Option lambdaOpt = obuilder.withLongName("lambda").withRequired(false) .withArgument(abuilder.withName("lambda").withMinimum(1).withMaximum(1).create()) .withDescription("Regularization parameter (default = 0.01) ").withShortName("l").create(); Option iterOpt = obuilder.withLongName("iter").withRequired(false) .withArgument(abuilder.withName("iter").withMinimum(1).withMaximum(1).create()) .withDescription("Number of iterations (default = 10/lambda) ").withShortName("i").create(); Option kOpt = obuilder.withLongName("k").withRequired(false) .withArgument(abuilder.withName("k").withMinimum(1).withMaximum(1).create()) .withDescription("Size of block for stochastic gradient (default = 1)").withShortName("v").create(); Option sampleNumOpt = obuilder.withLongName("trainSampleNum").withRequired(false) .withArgument(abuilder.withName("trainSampleNum").withMinimum(1).withMaximum(1).create()) .withDescription(/*from ww w .ja va2 s . c om*/ "Number of Samples in traindata set, for large-scale dataset optimization (default = 0) ") .withShortName("tsn").create(); Option classNumOpt = obuilder.withLongName("classNum").withRequired(true) .withArgument(abuilder.withName("classNum").withMinimum(1).withMaximum(1).create()) .withDescription("The number of classes (Categories in multi-classification) ").withShortName("c") .create(); Option startingClassIndexOpt = obuilder.withLongName("startingClassIndex").withRequired(false) .withArgument(abuilder.withName("startingClassIndex").withMinimum(1).withMaximum(1).create()) .withDescription("The starting index of class (default = 0) or 1").withShortName("sci").create(); Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false) .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create(); Option svmTypeOpt = obuilder.withLongName("svmType").withRequired(false) .withArgument(abuilder.withName("svmType").withMinimum(1).withMaximum(1).create()) .withDescription("0 -> Binary Classfication, 1 -> Regression, " + "2 -> Multi-Classification (one-vs.-one), 3 -> Multi-Classification (one-vs.-others) ") .withShortName("s").create(); Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create(); // hadoop system setting. Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false) .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max map Split size ").withShortName("ms").create(); Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false) .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max Heap Size: ").withShortName("mhs").create(); Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false) .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create()) .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create(); Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false) .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create()) .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(trainFileOpt).withOption(outputFileOpt) .withOption(lambdaOpt).withOption(iterOpt).withOption(kOpt).withOption(svmTypeOpt) .withOption(classNumOpt).withOption(hdfsServerOpt).withOption(modelFileOpt) .withOption(startingClassIndexOpt).withOption(sampleNumOpt).withOption(mapSplitSizeOpt) .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt) .withOption(helpOpt).create(); SVMParameters para = new SVMParameters(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } para.setTrainFile(cmdLine.getValue(trainFileOpt).toString()); para.setOutFile(cmdLine.getValue(outputFileOpt).toString()); // lambda if (cmdLine.hasOption(lambdaOpt)) { para.setLambda(Double.parseDouble(cmdLine.getValue(lambdaOpt).toString())); } // iteration if (cmdLine.hasOption(iterOpt)) { para.setMaxIter(Integer.parseInt(cmdLine.getValue(iterOpt).toString())); } // k if (cmdLine.hasOption(kOpt)) { para.setExamplesPerIter(Integer.parseInt(cmdLine.getValue(kOpt).toString())); } // class number para.setClassNum(Integer.parseInt(cmdLine.getValue(classNumOpt).toString())); // number of samples in training data set. if (cmdLine.hasOption(sampleNumOpt)) { para.setTrainSampleNumber(Integer.parseInt(cmdLine.getValue(sampleNumOpt).toString())); } if (cmdLine.hasOption(startingClassIndexOpt)) { para.setStartingClassIndex(Integer.parseInt(cmdLine.getValue(startingClassIndexOpt).toString())); } // models' path para.setModelFileName(cmdLine.getValue(modelFileOpt).toString()); // hdfs server address if (cmdLine.hasOption(hdfsServerOpt)) { para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString()); } // multi classification classificationType if (cmdLine.hasOption(svmTypeOpt)) { para.setClassificationType(Integer.parseInt(cmdLine.getValue(svmTypeOpt).toString())); } // MapReduce system setting. if (cmdLine.hasOption(mapSplitSizeOpt)) { para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString())); } if (cmdLine.hasOption(numberofReducersOpt)) { para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString())); } if (cmdLine.hasOption(maxHeapSizeOpt)) { para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString()); } if (cmdLine.hasOption(taskTimeoutOpt)) { para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString())); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } // set parameters for the mapper, combiner, reducer // creat a job Job job = new Job(new Configuration()); // step 1.1 set job static parameters ParallelMultiClassifierTrainJob.setJobParameters(job); // step 1.2 set mapper parameters ParallelMultiClassifierTrainJob.setMapperParameters(job.getConfiguration(), para.getMaxIter(), para.getTrainSampleNumber(), para.getClassNum(), para.getClassificationType(), para.getStartingClassIndex()); ParallelMultiClassifierTrainJob.setReducerParameters(job.getConfiguration(), (float) para.getLambda(), para.getExamplesPerIter(), para.getModelFileName(), para.getHdfsServerAddr()); // set general parameters related to a job MapReduceUtil.setJobParameters(job, para.getTrainFile(), para.getOutFile(), para.getMapSplitSize(), para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout()); // submit a job log.info("job completed: " + MapReduceUtil.submitJob(job)); }
From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassPredictionDriver.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, OptionException { // example args: // -if /user/maximzhao/dataset/rcv1_test.binary -of // /user/maximzhao/rcv.result // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt // 1080// ww w .j a v a2 s . co m log.info("[job] " + JOB_NAME); DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option testFileOpt = obuilder.withLongName("testFile").withRequired(true) .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create()) .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create(); Option outputFileOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Out put file name: ").withShortName("of").create(); Option modelFileOpt = obuilder.withLongName("modelFilePath").withRequired(true) .withArgument(abuilder.withName("modelFilePath").withMinimum(1).withMaximum(1).create()) .withDescription("Name of model files Path (default = /user) ").withShortName("m").create(); Option classNumOpt = obuilder.withLongName("classNum").withRequired(true) .withArgument(abuilder.withName("classNum").withMinimum(1).withMaximum(1).create()) .withDescription("The number of classes (Categories in multi-classification) ").withShortName("c") .create(); Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false) .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create(); // system setup Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false) .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max map Split size ").withShortName("ms").create(); Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false) .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max Heap Size: ").withShortName("mhs").create(); Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false) .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create()) .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create(); Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false) .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create()) .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt) .withOption(mapSplitSizeOpt).withOption(classNumOpt).withOption(outputFileOpt) .withOption(maxHeapSizeOpt).withOption(hdfsServerOpt).withOption(taskTimeoutOpt) .withOption(numberofReducersOpt).withOption(helpOpt).create(); SVMParameters para = new SVMParameters(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } para.setTestFile(cmdLine.getValue(testFileOpt).toString()); para.setOutFile(cmdLine.getValue(outputFileOpt).toString()); // models' path para.setModelFileName(cmdLine.getValue(modelFileOpt).toString()); // class number para.setClassNum(Integer.parseInt(cmdLine.getValue(classNumOpt).toString())); // hdfs server address if (cmdLine.hasOption(hdfsServerOpt)) { para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString()); } if (cmdLine.hasOption(mapSplitSizeOpt)) { para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString())); } if (cmdLine.hasOption(numberofReducersOpt)) { para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString())); } if (cmdLine.hasOption(maxHeapSizeOpt)) { para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString()); } if (cmdLine.hasOption(taskTimeoutOpt)) { para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString())); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } // creat a job Job job = new Job(new Configuration()); // step 1.1 set job static parameters ParallelMultiClassPredictionJob.setJobParameters(job); // step 1.2 set mapper parameters ParallelMultiClassPredictionJob.setMapperParameters(job.getConfiguration(), para.getModelFileName(), para.getHdfsServerAddr(), para.getClassNum(), para.getClassificationType()); // set general parameters related to a job MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(), para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout()); // submit a job log.info("job completed: " + MapReduceUtil.submitJob(job)); }