Example usage for org.apache.commons.cli2.commandline Parser Parser

Introduction

In this page you can find the example usage for org.apache.commons.cli2.commandline Parser Parser.

Prototype

Parser

Source Link

Usage

From source file:org.apache.mahout.classifier.sequencelearning.hmm.ViterbiEvaluator.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
    ArgumentBuilder argumentBuilder = new ArgumentBuilder();

    Option inputOption = DefaultOptionCreator.inputOption().create();

    Option outputOption = DefaultOptionCreator.outputOption().create();

    Option modelOption = optionBuilder.withLongName("model").withDescription("Path to serialized HMM model")
            .withShortName("m")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create())
            .withRequired(true).create();

    Option likelihoodOption = optionBuilder.withLongName("likelihood")
            .withDescription("Compute likelihood of observed sequence").withShortName("l").withRequired(false)
            .create();/*w w w.j  av  a2  s .  com*/

    Group optionGroup = new GroupBuilder().withOption(inputOption).withOption(outputOption)
            .withOption(modelOption).withOption(likelihoodOption).withName("Options").create();

    try {
        Parser parser = new Parser();
        parser.setGroup(optionGroup);
        CommandLine commandLine = parser.parse(args);

        String input = (String) commandLine.getValue(inputOption);
        String output = (String) commandLine.getValue(outputOption);

        String modelPath = (String) commandLine.getValue(modelOption);

        boolean computeLikelihood = commandLine.hasOption(likelihoodOption);

        //reading serialized HMM
        DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath));
        HmmModel model;
        try {
            model = LossyHmmSerializer.deserialize(modelStream);
        } finally {
            Closeables.close(modelStream, true);
        }

        //reading observations
        List<Integer> observations = Lists.newArrayList();
        Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8");
        try {
            while (scanner.hasNextInt()) {
                observations.add(scanner.nextInt());
            }
        } finally {
            scanner.close();
        }

        int[] observationsArray = new int[observations.size()];
        for (int i = 0; i < observations.size(); ++i) {
            observationsArray[i] = observations.get(i);
        }

        //decoding
        int[] hiddenStates = HmmEvaluator.decode(model, observationsArray, true);

        //writing output
        PrintWriter writer = new PrintWriter(
                new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true);
        try {
            for (int hiddenState : hiddenStates) {
                writer.print(hiddenState);
                writer.print(' ');
            }
        } finally {
            Closeables.close(writer, false);
        }

        if (computeLikelihood) {
            System.out.println("Likelihood: " + HmmEvaluator.modelLikelihood(model, observationsArray, true));
        }
    } catch (OptionException e) {
        CommandLineUtil.printHelp(optionGroup);
    }
}

From source file:org.apache.mahout.classifier.sgd.RunAdaptiveLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get the trained model").create();

    Option outputFileOption = builder.withLongName("output").withRequired(true)
            .withDescription("the file path to output scores")
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create()).create();

    Option idColumnOption = builder.withLongName("idcolumn").withRequired(true)
            .withDescription("the name of the id column for each record")
            .withArgument(argumentBuilder.withName("idcolumn").withMaximum(1).create()).create();

    Option maxScoreOnlyOption = builder.withLongName("maxscoreonly")
            .withDescription("only output the target label with max scores").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFileOption)
            .withOption(modelFileOption).withOption(outputFileOption).withOption(idColumnOption)
            .withOption(maxScoreOnlyOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);/*from w  w w . ja va 2s.c o m*/
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = getStringArgument(cmdLine, inputFileOption);
    modelFile = getStringArgument(cmdLine, modelFileOption);
    outputFile = getStringArgument(cmdLine, outputFileOption);
    idColumn = getStringArgument(cmdLine, idColumnOption);
    maxScoreOnly = getBooleanArgument(cmdLine, maxScoreOnlyOption);
    return true;
}

From source file:org.apache.mahout.classifier.sgd.RunLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    Option auc = builder.withLongName("auc").withDescription("print AUC").create();
    Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create();

    Option scores = builder.withLongName("scores").withDescription("print scores").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(auc).withOption(scores)
            .withOption(confusion).withOption(inputFileOption).withOption(modelFileOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);/*  w  w  w.  ja  va  2 s .  c o m*/
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = getStringArgument(cmdLine, inputFileOption);
    modelFile = getStringArgument(cmdLine, modelFileOption);
    showAuc = getBooleanArgument(cmdLine, auc);
    showScores = getBooleanArgument(cmdLine, scores);
    showConfusion = getBooleanArgument(cmdLine, confusion);

    return true;
}

From source file:org.apache.mahout.classifier.sgd.TestASFEmail.java

boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption)
            .withOption(modelFileOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);//  w  w  w  .j  ava 2 s.co  m
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = (String) cmdLine.getValue(inputFileOption);
    modelFile = (String) cmdLine.getValue(modelFileOption);
    return true;
}

From source file:org.apache.mahout.classifier.sgd.TrainAdaptiveLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option showperf = builder.withLongName("showperf")
            .withDescription("output performance measures during training").create();

    Option inputFile = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option outputFile = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to write the model content").create();

    Option threads = builder.withLongName("threads")
            .withArgument(argumentBuilder.withName("threads").withDefault("4").create())
            .withDescription("the number of threads AdaptiveLogisticRegression uses").create();

    Option predictors = builder.withLongName("predictors").withRequired(true)
            .withArgument(argumentBuilder.withName("predictors").create())
            .withDescription("a list of predictor variables").create();

    Option types = builder.withLongName("types").withRequired(true)
            .withArgument(argumentBuilder.withName("types").create())
            .withDescription("a list of predictor variable types (numeric, word, or text)").create();

    Option target = builder.withLongName("target").withDescription("the name of the target variable")
            .withRequired(true).withArgument(argumentBuilder.withName("target").withMaximum(1).create())
            .create();/*from  ww  w  . j  a v a2s.c o  m*/

    Option targetCategories = builder.withLongName("categories")
            .withDescription("the number of target categories to be considered").withRequired(true)
            .withArgument(argumentBuilder.withName("categories").withMaximum(1).create()).create();

    Option features = builder.withLongName("features")
            .withDescription("the number of internal hashed features to use")
            .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create())
            .create();

    Option passes = builder.withLongName("passes")
            .withDescription("the number of times to pass over the input data")
            .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create()).create();

    Option interval = builder.withLongName("interval")
            .withArgument(argumentBuilder.withName("interval").withDefault("500").create())
            .withDescription("the interval property of AdaptiveLogisticRegression").create();

    Option window = builder.withLongName("window")
            .withArgument(argumentBuilder.withName("window").withDefault("800").create())
            .withDescription("the average propery of AdaptiveLogisticRegression").create();

    Option skipperfnum = builder.withLongName("skipperfnum")
            .withArgument(argumentBuilder.withName("skipperfnum").withDefault("99").create())
            .withDescription("show performance measures every (skipperfnum + 1) rows").create();

    Option prior = builder.withLongName("prior")
            .withArgument(argumentBuilder.withName("prior").withDefault("L1").create())
            .withDescription("the prior algorithm to use: L1, L2, ebp, tp, up").create();

    Option priorOption = builder.withLongName("prioroption")
            .withArgument(argumentBuilder.withName("prioroption").create())
            .withDescription("constructor parameter for ElasticBandPrior and TPrior").create();

    Option auc = builder.withLongName("auc")
            .withArgument(argumentBuilder.withName("auc").withDefault("global").create())
            .withDescription("the auc to use: global or grouped").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile)
            .withOption(outputFile).withOption(target).withOption(targetCategories).withOption(predictors)
            .withOption(types).withOption(passes).withOption(interval).withOption(window).withOption(threads)
            .withOption(prior).withOption(features).withOption(showperf).withOption(skipperfnum)
            .withOption(priorOption).withOption(auc).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    TrainAdaptiveLogistic.inputFile = getStringArgument(cmdLine, inputFile);
    TrainAdaptiveLogistic.outputFile = getStringArgument(cmdLine, outputFile);

    List<String> typeList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(types)) {
        typeList.add(x.toString());
    }

    List<String> predictorList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(predictors)) {
        predictorList.add(x.toString());
    }

    lmp = new AdaptiveLogisticModelParameters();
    lmp.setTargetVariable(getStringArgument(cmdLine, target));
    lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories));
    lmp.setNumFeatures(getIntegerArgument(cmdLine, features));
    lmp.setInterval(getIntegerArgument(cmdLine, interval));
    lmp.setAverageWindow(getIntegerArgument(cmdLine, window));
    lmp.setThreads(getIntegerArgument(cmdLine, threads));
    lmp.setAuc(getStringArgument(cmdLine, auc));
    lmp.setPrior(getStringArgument(cmdLine, prior));
    if (cmdLine.getValue(priorOption) != null) {
        lmp.setPriorOption(getDoubleArgument(cmdLine, priorOption));
    }
    lmp.setTypeMap(predictorList, typeList);
    TrainAdaptiveLogistic.showperf = getBooleanArgument(cmdLine, showperf);
    TrainAdaptiveLogistic.skipperfnum = getIntegerArgument(cmdLine, skipperfnum);
    TrainAdaptiveLogistic.passes = getIntegerArgument(cmdLine, passes);

    lmp.checkParameters();

    return true;
}

From source file:org.apache.mahout.classifier.sgd.TrainLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();
    Option scores = builder.withLongName("scores").withDescription("output score diagnostics during training")
            .create();//from  w  w w .  ja  va2 s.c o m

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFile = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option outputFile = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option predictors = builder.withLongName("predictors").withRequired(true)
            .withArgument(argumentBuilder.withName("p").create())
            .withDescription("a list of predictor variables").create();

    Option types = builder.withLongName("types").withRequired(true)
            .withArgument(argumentBuilder.withName("t").create())
            .withDescription("a list of predictor variable types (numeric, word, or text)").create();

    Option target = builder.withLongName("target").withRequired(true)
            .withArgument(argumentBuilder.withName("target").withMaximum(1).create())
            .withDescription("the name of the target variable").create();

    Option features = builder.withLongName("features")
            .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create())
            .withDescription("the number of internal hashed features to use").create();

    Option passes = builder.withLongName("passes")
            .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create())
            .withDescription("the number of times to pass over the input data").create();

    Option lambda = builder.withLongName("lambda")
            .withArgument(argumentBuilder.withName("lambda").withDefault("1e-4").withMaximum(1).create())
            .withDescription("the amount of coefficient decay to use").create();

    Option rate = builder.withLongName("rate")
            .withArgument(argumentBuilder.withName("learningRate").withDefault("1e-3").withMaximum(1).create())
            .withDescription("the learning rate").create();

    Option noBias = builder.withLongName("noBias").withDescription("don't include a bias term").create();

    Option targetCategories = builder.withLongName("categories").withRequired(true)
            .withArgument(argumentBuilder.withName("number").withMaximum(1).create())
            .withDescription("the number of target categories to be considered").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile)
            .withOption(outputFile).withOption(target).withOption(targetCategories).withOption(predictors)
            .withOption(types).withOption(passes).withOption(lambda).withOption(rate).withOption(noBias)
            .withOption(features).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    TrainLogistic.inputFile = getStringArgument(cmdLine, inputFile);
    TrainLogistic.outputFile = getStringArgument(cmdLine, outputFile);

    List<String> typeList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(types)) {
        typeList.add(x.toString());
    }

    List<String> predictorList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(predictors)) {
        predictorList.add(x.toString());
    }

    lmp = new LogisticModelParameters();
    lmp.setTargetVariable(getStringArgument(cmdLine, target));
    lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories));
    lmp.setNumFeatures(getIntegerArgument(cmdLine, features));
    lmp.setUseBias(!getBooleanArgument(cmdLine, noBias));
    lmp.setTypeMap(predictorList, typeList);

    lmp.setLambda(getDoubleArgument(cmdLine, lambda));
    lmp.setLearningRate(getDoubleArgument(cmdLine, rate));

    TrainLogistic.scores = getBooleanArgument(cmdLine, scores);
    TrainLogistic.passes = getIntegerArgument(cmdLine, passes);

    return true;
}

From source file:org.apache.mahout.classifier.sgd.ValidateAdaptiveLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    Option auc = builder.withLongName("auc").withDescription("print AUC").create();
    Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create();

    Option scores = builder.withLongName("scores").withDescription("print scores").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get validate data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get the trained model").create();

    Option defaultCagetoryOption = builder.withLongName("defaultCategory").withRequired(false)
            .withArgument(//from   w w  w  .ja  va 2s  .  c o  m
                    argumentBuilder.withName("defaultCategory").withMaximum(1).withDefault("unknown").create())
            .withDescription("the default category value to use").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(auc).withOption(scores)
            .withOption(confusion).withOption(inputFileOption).withOption(modelFileOption)
            .withOption(defaultCagetoryOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = getStringArgument(cmdLine, inputFileOption);
    modelFile = getStringArgument(cmdLine, modelFileOption);
    defaultCategory = getStringArgument(cmdLine, defaultCagetoryOption);
    showAuc = getBooleanArgument(cmdLine, auc);
    showScores = getBooleanArgument(cmdLine, scores);
    showConfusion = getBooleanArgument(cmdLine, confusion);

    return true;
}

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelClassifierDriver.java

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, OptionException {

    // example args:
    // -if /user/maximzhao/dataset/rcv1_test.binary -of
    // /user/maximzhao/rcv.result
    // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt
    // 1080//from  w  ww  .  ja  v a2 s .com
    log.info("[job] " + JOB_NAME);
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option testFileOpt = obuilder.withLongName("testFile").withRequired(true)
            .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create();

    Option outputFileOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Out put file name: ").withShortName("of").create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true)
            .withArgument(abuilder.withName("modelFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create();

    Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false)
            .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max map Split size ").withShortName("ms").create();

    Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false)
            .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max Heap Size: ").withShortName("mhs").create();

    Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false)
            .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create();

    Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false)
            .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create())
            .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt)
            .withOption(mapSplitSizeOpt).withOption(hdfsServerOpt).withOption(outputFileOpt)
            .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt)
            .withOption(helpOpt).create();
    SVMParameters para = new SVMParameters();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        para.setTestFile(cmdLine.getValue(testFileOpt).toString());
        para.setOutFile(cmdLine.getValue(outputFileOpt).toString());
        para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());

        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        }

        if (cmdLine.hasOption(mapSplitSizeOpt)) {
            para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString()));
        }

        if (cmdLine.hasOption(numberofReducersOpt)) {
            para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString()));
        }

        if (cmdLine.hasOption(maxHeapSizeOpt)) {
            para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString());
        }
        if (cmdLine.hasOption(taskTimeoutOpt)) {
            para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString()));
        }

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
    // set parameters for the mapper, combiner, reducer

    // creat a job
    Job job = new Job(new Configuration());

    // step 1.1 set job static parameters
    ParallelClassifierJob.setJobParameters(job);

    // step 1.2 set mapper parameters
    ParallelClassifierJob.setMapperParameters(job.getConfiguration(), para.getHdfsServerAddr(),
            para.getModelFileName());

    // set general parameters related to a job
    MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(),
            para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout());

    // submit a job
    log.info("job completed: " + MapReduceUtil.submitJob(job));
}

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassifierTrainDriver.java

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, OptionException {
    // args = new String [] {"-if","infile","-of","outfile","m",
    // "-nm","10","--nr","11"};
    log.info("[job] " + JOB_NAME);

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option trainFileOpt = obuilder.withLongName("trainFile").withRequired(true)
            .withArgument(abuilder.withName("trainFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Training data set file").withShortName("if").create();

    Option outputFileOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Out put file name: ").withShortName("of").create();

    Option lambdaOpt = obuilder.withLongName("lambda").withRequired(false)
            .withArgument(abuilder.withName("lambda").withMinimum(1).withMaximum(1).create())
            .withDescription("Regularization parameter (default = 0.01) ").withShortName("l").create();

    Option iterOpt = obuilder.withLongName("iter").withRequired(false)
            .withArgument(abuilder.withName("iter").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of iterations (default = 10/lambda) ").withShortName("i").create();

    Option kOpt = obuilder.withLongName("k").withRequired(false)
            .withArgument(abuilder.withName("k").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of block for stochastic gradient (default = 1)").withShortName("v").create();

    Option sampleNumOpt = obuilder.withLongName("trainSampleNum").withRequired(false)
            .withArgument(abuilder.withName("trainSampleNum").withMinimum(1).withMaximum(1).create())
            .withDescription(/*from ww w  .ja  va2 s  . c om*/
                    "Number of Samples in traindata set, for large-scale dataset optimization (default = 0) ")
            .withShortName("tsn").create();

    Option classNumOpt = obuilder.withLongName("classNum").withRequired(true)
            .withArgument(abuilder.withName("classNum").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of classes (Categories in multi-classification) ").withShortName("c")
            .create();

    Option startingClassIndexOpt = obuilder.withLongName("startingClassIndex").withRequired(false)
            .withArgument(abuilder.withName("startingClassIndex").withMinimum(1).withMaximum(1).create())
            .withDescription("The starting index of class (default = 0) or 1").withShortName("sci").create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    Option svmTypeOpt = obuilder.withLongName("svmType").withRequired(false)
            .withArgument(abuilder.withName("svmType").withMinimum(1).withMaximum(1).create())
            .withDescription("0 -> Binary Classfication, 1 -> Regression, "
                    + "2 -> Multi-Classification (one-vs.-one), 3 -> Multi-Classification (one-vs.-others) ")
            .withShortName("s").create();

    Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create();

    // hadoop system setting.
    Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false)
            .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max map Split size ").withShortName("ms").create();

    Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false)
            .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max Heap Size: ").withShortName("mhs").create();

    Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false)
            .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create();

    Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false)
            .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create())
            .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(trainFileOpt).withOption(outputFileOpt)
            .withOption(lambdaOpt).withOption(iterOpt).withOption(kOpt).withOption(svmTypeOpt)
            .withOption(classNumOpt).withOption(hdfsServerOpt).withOption(modelFileOpt)
            .withOption(startingClassIndexOpt).withOption(sampleNumOpt).withOption(mapSplitSizeOpt)
            .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt)
            .withOption(helpOpt).create();

    SVMParameters para = new SVMParameters();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        para.setTrainFile(cmdLine.getValue(trainFileOpt).toString());
        para.setOutFile(cmdLine.getValue(outputFileOpt).toString());

        // lambda
        if (cmdLine.hasOption(lambdaOpt)) {
            para.setLambda(Double.parseDouble(cmdLine.getValue(lambdaOpt).toString()));
        }
        // iteration
        if (cmdLine.hasOption(iterOpt)) {
            para.setMaxIter(Integer.parseInt(cmdLine.getValue(iterOpt).toString()));
        }
        // k
        if (cmdLine.hasOption(kOpt)) {
            para.setExamplesPerIter(Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        // class number
        para.setClassNum(Integer.parseInt(cmdLine.getValue(classNumOpt).toString()));
        // number of samples in training data set.
        if (cmdLine.hasOption(sampleNumOpt)) {
            para.setTrainSampleNumber(Integer.parseInt(cmdLine.getValue(sampleNumOpt).toString()));
        }

        if (cmdLine.hasOption(startingClassIndexOpt)) {
            para.setStartingClassIndex(Integer.parseInt(cmdLine.getValue(startingClassIndexOpt).toString()));
        }
        // models' path
        para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());
        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        }
        // multi classification classificationType
        if (cmdLine.hasOption(svmTypeOpt)) {
            para.setClassificationType(Integer.parseInt(cmdLine.getValue(svmTypeOpt).toString()));
        }
        // MapReduce system setting.
        if (cmdLine.hasOption(mapSplitSizeOpt)) {
            para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString()));
        }
        if (cmdLine.hasOption(numberofReducersOpt)) {
            para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString()));
        }
        if (cmdLine.hasOption(maxHeapSizeOpt)) {
            para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString());
        }
        if (cmdLine.hasOption(taskTimeoutOpt)) {
            para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString()));
        }

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

    // set parameters for the mapper, combiner, reducer

    // creat a job
    Job job = new Job(new Configuration());

    // step 1.1 set job static parameters
    ParallelMultiClassifierTrainJob.setJobParameters(job);

    // step 1.2 set mapper parameters
    ParallelMultiClassifierTrainJob.setMapperParameters(job.getConfiguration(), para.getMaxIter(),
            para.getTrainSampleNumber(), para.getClassNum(), para.getClassificationType(),
            para.getStartingClassIndex());

    ParallelMultiClassifierTrainJob.setReducerParameters(job.getConfiguration(), (float) para.getLambda(),
            para.getExamplesPerIter(), para.getModelFileName(), para.getHdfsServerAddr());

    // set general parameters related to a job
    MapReduceUtil.setJobParameters(job, para.getTrainFile(), para.getOutFile(), para.getMapSplitSize(),
            para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout());

    // submit a job
    log.info("job completed: " + MapReduceUtil.submitJob(job));
}

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassPredictionDriver.java

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, OptionException {

    // example args:
    // -if /user/maximzhao/dataset/rcv1_test.binary -of
    // /user/maximzhao/rcv.result
    // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt
    // 1080// ww w  .j a  v a2  s  . co  m
    log.info("[job] " + JOB_NAME);
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option testFileOpt = obuilder.withLongName("testFile").withRequired(true)
            .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create();

    Option outputFileOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Out put file name: ").withShortName("of").create();

    Option modelFileOpt = obuilder.withLongName("modelFilePath").withRequired(true)
            .withArgument(abuilder.withName("modelFilePath").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model files Path (default = /user) ").withShortName("m").create();

    Option classNumOpt = obuilder.withLongName("classNum").withRequired(true)
            .withArgument(abuilder.withName("classNum").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of classes (Categories in multi-classification) ").withShortName("c")
            .create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    // system setup
    Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false)
            .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max map Split size ").withShortName("ms").create();

    Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false)
            .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max Heap Size: ").withShortName("mhs").create();

    Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false)
            .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create();

    Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false)
            .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create())
            .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt)
            .withOption(mapSplitSizeOpt).withOption(classNumOpt).withOption(outputFileOpt)
            .withOption(maxHeapSizeOpt).withOption(hdfsServerOpt).withOption(taskTimeoutOpt)
            .withOption(numberofReducersOpt).withOption(helpOpt).create();

    SVMParameters para = new SVMParameters();

    try {

        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        para.setTestFile(cmdLine.getValue(testFileOpt).toString());
        para.setOutFile(cmdLine.getValue(outputFileOpt).toString());
        // models' path
        para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());
        // class number
        para.setClassNum(Integer.parseInt(cmdLine.getValue(classNumOpt).toString()));

        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        }

        if (cmdLine.hasOption(mapSplitSizeOpt)) {
            para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString()));
        }

        if (cmdLine.hasOption(numberofReducersOpt)) {
            para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString()));
        }

        if (cmdLine.hasOption(maxHeapSizeOpt)) {
            para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString());
        }
        if (cmdLine.hasOption(taskTimeoutOpt)) {
            para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString()));
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

    // creat a job
    Job job = new Job(new Configuration());

    // step 1.1 set job static parameters
    ParallelMultiClassPredictionJob.setJobParameters(job);

    // step 1.2 set mapper parameters
    ParallelMultiClassPredictionJob.setMapperParameters(job.getConfiguration(), para.getModelFileName(),
            para.getHdfsServerAddr(), para.getClassNum(), para.getClassificationType());

    // set general parameters related to a job
    MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(),
            para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout());

    // submit a job
    log.info("job completed: " + MapReduceUtil.submitJob(job));
}