Example usage for org.apache.commons.cli2.commandline Parser setGroup

List of usage examples for org.apache.commons.cli2.commandline Parser setGroup

Introduction

In this page you can find the example usage for org.apache.commons.cli2.commandline Parser setGroup.

Prototype

public void setGroup(final Group group) 

Source Link

Document

Sets the Group of options to parse against

Usage

From source file:org.apache.mahout.classifier.sequencelearning.hmm.ViterbiEvaluator.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
    ArgumentBuilder argumentBuilder = new ArgumentBuilder();

    Option inputOption = DefaultOptionCreator.inputOption().create();

    Option outputOption = DefaultOptionCreator.outputOption().create();

    Option modelOption = optionBuilder.withLongName("model").withDescription("Path to serialized HMM model")
            .withShortName("m")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create())
            .withRequired(true).create();

    Option likelihoodOption = optionBuilder.withLongName("likelihood")
            .withDescription("Compute likelihood of observed sequence").withShortName("l").withRequired(false)
            .create();//w w  w  .j ava2 s  . c o m

    Group optionGroup = new GroupBuilder().withOption(inputOption).withOption(outputOption)
            .withOption(modelOption).withOption(likelihoodOption).withName("Options").create();

    try {
        Parser parser = new Parser();
        parser.setGroup(optionGroup);
        CommandLine commandLine = parser.parse(args);

        String input = (String) commandLine.getValue(inputOption);
        String output = (String) commandLine.getValue(outputOption);

        String modelPath = (String) commandLine.getValue(modelOption);

        boolean computeLikelihood = commandLine.hasOption(likelihoodOption);

        //reading serialized HMM
        DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath));
        HmmModel model;
        try {
            model = LossyHmmSerializer.deserialize(modelStream);
        } finally {
            Closeables.close(modelStream, true);
        }

        //reading observations
        List<Integer> observations = Lists.newArrayList();
        Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8");
        try {
            while (scanner.hasNextInt()) {
                observations.add(scanner.nextInt());
            }
        } finally {
            scanner.close();
        }

        int[] observationsArray = new int[observations.size()];
        for (int i = 0; i < observations.size(); ++i) {
            observationsArray[i] = observations.get(i);
        }

        //decoding
        int[] hiddenStates = HmmEvaluator.decode(model, observationsArray, true);

        //writing output
        PrintWriter writer = new PrintWriter(
                new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true);
        try {
            for (int hiddenState : hiddenStates) {
                writer.print(hiddenState);
                writer.print(' ');
            }
        } finally {
            Closeables.close(writer, false);
        }

        if (computeLikelihood) {
            System.out.println("Likelihood: " + HmmEvaluator.modelLikelihood(model, observationsArray, true));
        }
    } catch (OptionException e) {
        CommandLineUtil.printHelp(optionGroup);
    }
}

From source file:org.apache.mahout.classifier.sgd.RunAdaptiveLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get the trained model").create();

    Option outputFileOption = builder.withLongName("output").withRequired(true)
            .withDescription("the file path to output scores")
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create()).create();

    Option idColumnOption = builder.withLongName("idcolumn").withRequired(true)
            .withDescription("the name of the id column for each record")
            .withArgument(argumentBuilder.withName("idcolumn").withMaximum(1).create()).create();

    Option maxScoreOnlyOption = builder.withLongName("maxscoreonly")
            .withDescription("only output the target label with max scores").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFileOption)
            .withOption(modelFileOption).withOption(outputFileOption).withOption(idColumnOption)
            .withOption(maxScoreOnlyOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);//from   w ww . j a v a2s  .com
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = getStringArgument(cmdLine, inputFileOption);
    modelFile = getStringArgument(cmdLine, modelFileOption);
    outputFile = getStringArgument(cmdLine, outputFileOption);
    idColumn = getStringArgument(cmdLine, idColumnOption);
    maxScoreOnly = getBooleanArgument(cmdLine, maxScoreOnlyOption);
    return true;
}

From source file:org.apache.mahout.classifier.sgd.RunLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    Option auc = builder.withLongName("auc").withDescription("print AUC").create();
    Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create();

    Option scores = builder.withLongName("scores").withDescription("print scores").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(auc).withOption(scores)
            .withOption(confusion).withOption(inputFileOption).withOption(modelFileOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);//from w w  w  . j a va 2 s .  c  om
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = getStringArgument(cmdLine, inputFileOption);
    modelFile = getStringArgument(cmdLine, modelFileOption);
    showAuc = getBooleanArgument(cmdLine, auc);
    showScores = getBooleanArgument(cmdLine, scores);
    showConfusion = getBooleanArgument(cmdLine, confusion);

    return true;
}

From source file:org.apache.mahout.classifier.sgd.TestASFEmail.java

boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption)
            .withOption(modelFileOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);/*from  ww w. ja  v a2 s  .  com*/
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = (String) cmdLine.getValue(inputFileOption);
    modelFile = (String) cmdLine.getValue(modelFileOption);
    return true;
}

From source file:org.apache.mahout.classifier.sgd.TrainAdaptiveLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option showperf = builder.withLongName("showperf")
            .withDescription("output performance measures during training").create();

    Option inputFile = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option outputFile = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to write the model content").create();

    Option threads = builder.withLongName("threads")
            .withArgument(argumentBuilder.withName("threads").withDefault("4").create())
            .withDescription("the number of threads AdaptiveLogisticRegression uses").create();

    Option predictors = builder.withLongName("predictors").withRequired(true)
            .withArgument(argumentBuilder.withName("predictors").create())
            .withDescription("a list of predictor variables").create();

    Option types = builder.withLongName("types").withRequired(true)
            .withArgument(argumentBuilder.withName("types").create())
            .withDescription("a list of predictor variable types (numeric, word, or text)").create();

    Option target = builder.withLongName("target").withDescription("the name of the target variable")
            .withRequired(true).withArgument(argumentBuilder.withName("target").withMaximum(1).create())
            .create();//w  w  w.  j av a2 s  .  com

    Option targetCategories = builder.withLongName("categories")
            .withDescription("the number of target categories to be considered").withRequired(true)
            .withArgument(argumentBuilder.withName("categories").withMaximum(1).create()).create();

    Option features = builder.withLongName("features")
            .withDescription("the number of internal hashed features to use")
            .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create())
            .create();

    Option passes = builder.withLongName("passes")
            .withDescription("the number of times to pass over the input data")
            .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create()).create();

    Option interval = builder.withLongName("interval")
            .withArgument(argumentBuilder.withName("interval").withDefault("500").create())
            .withDescription("the interval property of AdaptiveLogisticRegression").create();

    Option window = builder.withLongName("window")
            .withArgument(argumentBuilder.withName("window").withDefault("800").create())
            .withDescription("the average propery of AdaptiveLogisticRegression").create();

    Option skipperfnum = builder.withLongName("skipperfnum")
            .withArgument(argumentBuilder.withName("skipperfnum").withDefault("99").create())
            .withDescription("show performance measures every (skipperfnum + 1) rows").create();

    Option prior = builder.withLongName("prior")
            .withArgument(argumentBuilder.withName("prior").withDefault("L1").create())
            .withDescription("the prior algorithm to use: L1, L2, ebp, tp, up").create();

    Option priorOption = builder.withLongName("prioroption")
            .withArgument(argumentBuilder.withName("prioroption").create())
            .withDescription("constructor parameter for ElasticBandPrior and TPrior").create();

    Option auc = builder.withLongName("auc")
            .withArgument(argumentBuilder.withName("auc").withDefault("global").create())
            .withDescription("the auc to use: global or grouped").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile)
            .withOption(outputFile).withOption(target).withOption(targetCategories).withOption(predictors)
            .withOption(types).withOption(passes).withOption(interval).withOption(window).withOption(threads)
            .withOption(prior).withOption(features).withOption(showperf).withOption(skipperfnum)
            .withOption(priorOption).withOption(auc).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    TrainAdaptiveLogistic.inputFile = getStringArgument(cmdLine, inputFile);
    TrainAdaptiveLogistic.outputFile = getStringArgument(cmdLine, outputFile);

    List<String> typeList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(types)) {
        typeList.add(x.toString());
    }

    List<String> predictorList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(predictors)) {
        predictorList.add(x.toString());
    }

    lmp = new AdaptiveLogisticModelParameters();
    lmp.setTargetVariable(getStringArgument(cmdLine, target));
    lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories));
    lmp.setNumFeatures(getIntegerArgument(cmdLine, features));
    lmp.setInterval(getIntegerArgument(cmdLine, interval));
    lmp.setAverageWindow(getIntegerArgument(cmdLine, window));
    lmp.setThreads(getIntegerArgument(cmdLine, threads));
    lmp.setAuc(getStringArgument(cmdLine, auc));
    lmp.setPrior(getStringArgument(cmdLine, prior));
    if (cmdLine.getValue(priorOption) != null) {
        lmp.setPriorOption(getDoubleArgument(cmdLine, priorOption));
    }
    lmp.setTypeMap(predictorList, typeList);
    TrainAdaptiveLogistic.showperf = getBooleanArgument(cmdLine, showperf);
    TrainAdaptiveLogistic.skipperfnum = getIntegerArgument(cmdLine, skipperfnum);
    TrainAdaptiveLogistic.passes = getIntegerArgument(cmdLine, passes);

    lmp.checkParameters();

    return true;
}

From source file:org.apache.mahout.classifier.sgd.TrainLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();
    Option scores = builder.withLongName("scores").withDescription("output score diagnostics during training")
            .create();//from  w  ww.j av a2s .co  m

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFile = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option outputFile = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option predictors = builder.withLongName("predictors").withRequired(true)
            .withArgument(argumentBuilder.withName("p").create())
            .withDescription("a list of predictor variables").create();

    Option types = builder.withLongName("types").withRequired(true)
            .withArgument(argumentBuilder.withName("t").create())
            .withDescription("a list of predictor variable types (numeric, word, or text)").create();

    Option target = builder.withLongName("target").withRequired(true)
            .withArgument(argumentBuilder.withName("target").withMaximum(1).create())
            .withDescription("the name of the target variable").create();

    Option features = builder.withLongName("features")
            .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create())
            .withDescription("the number of internal hashed features to use").create();

    Option passes = builder.withLongName("passes")
            .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create())
            .withDescription("the number of times to pass over the input data").create();

    Option lambda = builder.withLongName("lambda")
            .withArgument(argumentBuilder.withName("lambda").withDefault("1e-4").withMaximum(1).create())
            .withDescription("the amount of coefficient decay to use").create();

    Option rate = builder.withLongName("rate")
            .withArgument(argumentBuilder.withName("learningRate").withDefault("1e-3").withMaximum(1).create())
            .withDescription("the learning rate").create();

    Option noBias = builder.withLongName("noBias").withDescription("don't include a bias term").create();

    Option targetCategories = builder.withLongName("categories").withRequired(true)
            .withArgument(argumentBuilder.withName("number").withMaximum(1).create())
            .withDescription("the number of target categories to be considered").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile)
            .withOption(outputFile).withOption(target).withOption(targetCategories).withOption(predictors)
            .withOption(types).withOption(passes).withOption(lambda).withOption(rate).withOption(noBias)
            .withOption(features).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    TrainLogistic.inputFile = getStringArgument(cmdLine, inputFile);
    TrainLogistic.outputFile = getStringArgument(cmdLine, outputFile);

    List<String> typeList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(types)) {
        typeList.add(x.toString());
    }

    List<String> predictorList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(predictors)) {
        predictorList.add(x.toString());
    }

    lmp = new LogisticModelParameters();
    lmp.setTargetVariable(getStringArgument(cmdLine, target));
    lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories));
    lmp.setNumFeatures(getIntegerArgument(cmdLine, features));
    lmp.setUseBias(!getBooleanArgument(cmdLine, noBias));
    lmp.setTypeMap(predictorList, typeList);

    lmp.setLambda(getDoubleArgument(cmdLine, lambda));
    lmp.setLearningRate(getDoubleArgument(cmdLine, rate));

    TrainLogistic.scores = getBooleanArgument(cmdLine, scores);
    TrainLogistic.passes = getIntegerArgument(cmdLine, passes);

    return true;
}

From source file:org.apache.mahout.classifier.sgd.ValidateAdaptiveLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    Option auc = builder.withLongName("auc").withDescription("print AUC").create();
    Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create();

    Option scores = builder.withLongName("scores").withDescription("print scores").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get validate data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get the trained model").create();

    Option defaultCagetoryOption = builder.withLongName("defaultCategory").withRequired(false)
            .withArgument(//from  w  w w .  j av  a 2  s .c om
                    argumentBuilder.withName("defaultCategory").withMaximum(1).withDefault("unknown").create())
            .withDescription("the default category value to use").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(auc).withOption(scores)
            .withOption(confusion).withOption(inputFileOption).withOption(modelFileOption)
            .withOption(defaultCagetoryOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = getStringArgument(cmdLine, inputFileOption);
    modelFile = getStringArgument(cmdLine, modelFileOption);
    defaultCategory = getStringArgument(cmdLine, defaultCagetoryOption);
    showAuc = getBooleanArgument(cmdLine, auc);
    showScores = getBooleanArgument(cmdLine, scores);
    showConfusion = getBooleanArgument(cmdLine, confusion);

    return true;
}

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelClassifierDriver.java

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, OptionException {

    // example args:
    // -if /user/maximzhao/dataset/rcv1_test.binary -of
    // /user/maximzhao/rcv.result
    // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt
    // 1080//from   ww w . j a  v  a 2s .co m
    log.info("[job] " + JOB_NAME);
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option testFileOpt = obuilder.withLongName("testFile").withRequired(true)
            .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create();

    Option outputFileOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Out put file name: ").withShortName("of").create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true)
            .withArgument(abuilder.withName("modelFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create();

    Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false)
            .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max map Split size ").withShortName("ms").create();

    Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false)
            .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max Heap Size: ").withShortName("mhs").create();

    Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false)
            .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create();

    Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false)
            .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create())
            .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt)
            .withOption(mapSplitSizeOpt).withOption(hdfsServerOpt).withOption(outputFileOpt)
            .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt)
            .withOption(helpOpt).create();
    SVMParameters para = new SVMParameters();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        para.setTestFile(cmdLine.getValue(testFileOpt).toString());
        para.setOutFile(cmdLine.getValue(outputFileOpt).toString());
        para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());

        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        }

        if (cmdLine.hasOption(mapSplitSizeOpt)) {
            para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString()));
        }

        if (cmdLine.hasOption(numberofReducersOpt)) {
            para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString()));
        }

        if (cmdLine.hasOption(maxHeapSizeOpt)) {
            para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString());
        }
        if (cmdLine.hasOption(taskTimeoutOpt)) {
            para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString()));
        }

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
    // set parameters for the mapper, combiner, reducer

    // creat a job
    Job job = new Job(new Configuration());

    // step 1.1 set job static parameters
    ParallelClassifierJob.setJobParameters(job);

    // step 1.2 set mapper parameters
    ParallelClassifierJob.setMapperParameters(job.getConfiguration(), para.getHdfsServerAddr(),
            para.getModelFileName());

    // set general parameters related to a job
    MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(),
            para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout());

    // submit a job
    log.info("job completed: " + MapReduceUtil.submitJob(job));
}

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassifierTrainDriver.java

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, OptionException {
    // args = new String [] {"-if","infile","-of","outfile","m",
    // "-nm","10","--nr","11"};
    log.info("[job] " + JOB_NAME);

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option trainFileOpt = obuilder.withLongName("trainFile").withRequired(true)
            .withArgument(abuilder.withName("trainFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Training data set file").withShortName("if").create();

    Option outputFileOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Out put file name: ").withShortName("of").create();

    Option lambdaOpt = obuilder.withLongName("lambda").withRequired(false)
            .withArgument(abuilder.withName("lambda").withMinimum(1).withMaximum(1).create())
            .withDescription("Regularization parameter (default = 0.01) ").withShortName("l").create();

    Option iterOpt = obuilder.withLongName("iter").withRequired(false)
            .withArgument(abuilder.withName("iter").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of iterations (default = 10/lambda) ").withShortName("i").create();

    Option kOpt = obuilder.withLongName("k").withRequired(false)
            .withArgument(abuilder.withName("k").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of block for stochastic gradient (default = 1)").withShortName("v").create();

    Option sampleNumOpt = obuilder.withLongName("trainSampleNum").withRequired(false)
            .withArgument(abuilder.withName("trainSampleNum").withMinimum(1).withMaximum(1).create())
            .withDescription(//from  w w w .j a va2s.c o  m
                    "Number of Samples in traindata set, for large-scale dataset optimization (default = 0) ")
            .withShortName("tsn").create();

    Option classNumOpt = obuilder.withLongName("classNum").withRequired(true)
            .withArgument(abuilder.withName("classNum").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of classes (Categories in multi-classification) ").withShortName("c")
            .create();

    Option startingClassIndexOpt = obuilder.withLongName("startingClassIndex").withRequired(false)
            .withArgument(abuilder.withName("startingClassIndex").withMinimum(1).withMaximum(1).create())
            .withDescription("The starting index of class (default = 0) or 1").withShortName("sci").create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    Option svmTypeOpt = obuilder.withLongName("svmType").withRequired(false)
            .withArgument(abuilder.withName("svmType").withMinimum(1).withMaximum(1).create())
            .withDescription("0 -> Binary Classfication, 1 -> Regression, "
                    + "2 -> Multi-Classification (one-vs.-one), 3 -> Multi-Classification (one-vs.-others) ")
            .withShortName("s").create();

    Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create();

    // hadoop system setting.
    Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false)
            .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max map Split size ").withShortName("ms").create();

    Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false)
            .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max Heap Size: ").withShortName("mhs").create();

    Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false)
            .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create();

    Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false)
            .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create())
            .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(trainFileOpt).withOption(outputFileOpt)
            .withOption(lambdaOpt).withOption(iterOpt).withOption(kOpt).withOption(svmTypeOpt)
            .withOption(classNumOpt).withOption(hdfsServerOpt).withOption(modelFileOpt)
            .withOption(startingClassIndexOpt).withOption(sampleNumOpt).withOption(mapSplitSizeOpt)
            .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt)
            .withOption(helpOpt).create();

    SVMParameters para = new SVMParameters();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        para.setTrainFile(cmdLine.getValue(trainFileOpt).toString());
        para.setOutFile(cmdLine.getValue(outputFileOpt).toString());

        // lambda
        if (cmdLine.hasOption(lambdaOpt)) {
            para.setLambda(Double.parseDouble(cmdLine.getValue(lambdaOpt).toString()));
        }
        // iteration
        if (cmdLine.hasOption(iterOpt)) {
            para.setMaxIter(Integer.parseInt(cmdLine.getValue(iterOpt).toString()));
        }
        // k
        if (cmdLine.hasOption(kOpt)) {
            para.setExamplesPerIter(Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        // class number
        para.setClassNum(Integer.parseInt(cmdLine.getValue(classNumOpt).toString()));
        // number of samples in training data set.
        if (cmdLine.hasOption(sampleNumOpt)) {
            para.setTrainSampleNumber(Integer.parseInt(cmdLine.getValue(sampleNumOpt).toString()));
        }

        if (cmdLine.hasOption(startingClassIndexOpt)) {
            para.setStartingClassIndex(Integer.parseInt(cmdLine.getValue(startingClassIndexOpt).toString()));
        }
        // models' path
        para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());
        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        }
        // multi classification classificationType
        if (cmdLine.hasOption(svmTypeOpt)) {
            para.setClassificationType(Integer.parseInt(cmdLine.getValue(svmTypeOpt).toString()));
        }
        // MapReduce system setting.
        if (cmdLine.hasOption(mapSplitSizeOpt)) {
            para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString()));
        }
        if (cmdLine.hasOption(numberofReducersOpt)) {
            para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString()));
        }
        if (cmdLine.hasOption(maxHeapSizeOpt)) {
            para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString());
        }
        if (cmdLine.hasOption(taskTimeoutOpt)) {
            para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString()));
        }

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

    // set parameters for the mapper, combiner, reducer

    // creat a job
    Job job = new Job(new Configuration());

    // step 1.1 set job static parameters
    ParallelMultiClassifierTrainJob.setJobParameters(job);

    // step 1.2 set mapper parameters
    ParallelMultiClassifierTrainJob.setMapperParameters(job.getConfiguration(), para.getMaxIter(),
            para.getTrainSampleNumber(), para.getClassNum(), para.getClassificationType(),
            para.getStartingClassIndex());

    ParallelMultiClassifierTrainJob.setReducerParameters(job.getConfiguration(), (float) para.getLambda(),
            para.getExamplesPerIter(), para.getModelFileName(), para.getHdfsServerAddr());

    // set general parameters related to a job
    MapReduceUtil.setJobParameters(job, para.getTrainFile(), para.getOutFile(), para.getMapSplitSize(),
            para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout());

    // submit a job
    log.info("job completed: " + MapReduceUtil.submitJob(job));
}

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassPredictionDriver.java

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, OptionException {

    // example args:
    // -if /user/maximzhao/dataset/rcv1_test.binary -of
    // /user/maximzhao/rcv.result
    // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt
    // 1080/*from  ww  w  .  j a v a 2  s.c o  m*/
    log.info("[job] " + JOB_NAME);
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option testFileOpt = obuilder.withLongName("testFile").withRequired(true)
            .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create();

    Option outputFileOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Out put file name: ").withShortName("of").create();

    Option modelFileOpt = obuilder.withLongName("modelFilePath").withRequired(true)
            .withArgument(abuilder.withName("modelFilePath").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model files Path (default = /user) ").withShortName("m").create();

    Option classNumOpt = obuilder.withLongName("classNum").withRequired(true)
            .withArgument(abuilder.withName("classNum").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of classes (Categories in multi-classification) ").withShortName("c")
            .create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    // system setup
    Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false)
            .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max map Split size ").withShortName("ms").create();

    Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false)
            .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max Heap Size: ").withShortName("mhs").create();

    Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false)
            .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create();

    Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false)
            .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create())
            .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt)
            .withOption(mapSplitSizeOpt).withOption(classNumOpt).withOption(outputFileOpt)
            .withOption(maxHeapSizeOpt).withOption(hdfsServerOpt).withOption(taskTimeoutOpt)
            .withOption(numberofReducersOpt).withOption(helpOpt).create();

    SVMParameters para = new SVMParameters();

    try {

        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        para.setTestFile(cmdLine.getValue(testFileOpt).toString());
        para.setOutFile(cmdLine.getValue(outputFileOpt).toString());
        // models' path
        para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());
        // class number
        para.setClassNum(Integer.parseInt(cmdLine.getValue(classNumOpt).toString()));

        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        }

        if (cmdLine.hasOption(mapSplitSizeOpt)) {
            para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString()));
        }

        if (cmdLine.hasOption(numberofReducersOpt)) {
            para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString()));
        }

        if (cmdLine.hasOption(maxHeapSizeOpt)) {
            para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString());
        }
        if (cmdLine.hasOption(taskTimeoutOpt)) {
            para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString()));
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

    // creat a job
    Job job = new Job(new Configuration());

    // step 1.1 set job static parameters
    ParallelMultiClassPredictionJob.setJobParameters(job);

    // step 1.2 set mapper parameters
    ParallelMultiClassPredictionJob.setMapperParameters(job.getConfiguration(), para.getModelFileName(),
            para.getHdfsServerAddr(), para.getClassNum(), para.getClassificationType());

    // set general parameters related to a job
    MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(),
            para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout());

    // submit a job
    log.info("job completed: " + MapReduceUtil.submitJob(job));
}