Example usage for org.apache.commons.cli2.commandline Parser setGroup

List of usage examples for org.apache.commons.cli2.commandline Parser setGroup

Introduction

In this page you can find the example usage for org.apache.commons.cli2.commandline Parser setGroup.

Prototype

public void setGroup(final Group group) 

Source Link

Document

Sets the Group of options to parse against

Usage

From source file:org.apache.mahout.classifier.df.BreimanExample.java

@Override
public int run(String[] args) throws IOException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true)
            .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of trees to grow, each iteration").create();

    Option nbItersOpt = obuilder.withLongName("iterations").withShortName("i").withRequired(true)
            .withArgument(abuilder.withName("numIterations").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of times to repeat the test").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();/*from ww w  .j a v  a2s . c o  m*/

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(nbItersOpt)
            .withOption(nbtreesOpt).withOption(helpOpt).create();

    Path dataPath;
    Path datasetPath;
    int nbTrees;
    int nbIterations;

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());
        nbIterations = Integer.parseInt(cmdLine.getValue(nbItersOpt).toString());

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    // load the data
    FileSystem fs = dataPath.getFileSystem(new Configuration());
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    Data data = DataLoader.loadData(dataset, fs, dataPath);

    // take m to be the first integer less than log2(M) + 1, where M is the
    // number of inputs
    int m = (int) Math.floor(FastMath.log(2.0, data.getDataset().nbAttributes()) + 1);

    Random rng = RandomUtils.getRandom();
    for (int iteration = 0; iteration < nbIterations; iteration++) {
        log.info("Iteration {}", iteration);
        runIteration(rng, data, m, nbTrees);
    }

    log.info("********************************************");
    log.info("Random Input Test Error : {}", sumTestErrM / nbIterations);
    log.info("Single Input Test Error : {}", sumTestErrOne / nbIterations);
    log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations));
    log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations));
    log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations);
    log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations);

    return 0;
}

From source file:org.apache.mahout.classifier.df.mapreduce.Resampling.java

public int run(String[] args) throws Exception, ClassNotFoundException, InterruptedException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option dataPreprocessingOpt = obuilder.withLongName("dataPreprocessing").withShortName("dp")
            .withRequired(true).withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data Preprocessing path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option timeOpt = obuilder.withLongName("time").withShortName("tm").withRequired(false)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Time path").create();

    Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help")
            .create();/*from   w w w .  j  a  v a2  s.  c  o  m*/

    Option resamplingOpt = obuilder.withLongName("resampling").withShortName("rs").withRequired(true)
            .withArgument(abuilder.withName("resampling").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The resampling technique (oversampling (overs), undersampling (unders) or SMOTE (smote))")
            .create();

    Option nbpartitionsOpt = obuilder.withLongName("nbpartitions").withShortName("p").withRequired(true)
            .withArgument(abuilder.withName("nbpartitions").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of partitions").create();

    Option nposOpt = obuilder.withLongName("npos").withShortName("npos").withRequired(true)
            .withArgument(abuilder.withName("npos").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of instances of the positive class").create();

    Option nnegOpt = obuilder.withLongName("nneg").withShortName("nneg").withRequired(true)
            .withArgument(abuilder.withName("nneg").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of instances of the negative class").create();

    Option negclassOpt = obuilder.withLongName("negclass").withShortName("negclass").withRequired(true)
            .withArgument(abuilder.withName("negclass").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of the negative class").create();

    Option posclassOpt = obuilder.withLongName("posclass").withShortName("posclass").withRequired(true)
            .withArgument(abuilder.withName("posclass").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of the positive class").create();

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(timeOpt)
            .withOption(helpOpt).withOption(resamplingOpt).withOption(dataPreprocessingOpt)
            .withOption(nbpartitionsOpt).withOption(nposOpt).withOption(nnegOpt).withOption(negclassOpt)
            .withOption(posclassOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        dataPreprocessing = cmdLine.getValue(dataPreprocessingOpt).toString();
        String resampling = cmdLine.getValue(resamplingOpt).toString();
        partitions = Integer.parseInt(cmdLine.getValue(nbpartitionsOpt).toString());
        npos = Integer.parseInt(cmdLine.getValue(nposOpt).toString());
        nneg = Integer.parseInt(cmdLine.getValue(nnegOpt).toString());
        negclass = cmdLine.getValue(negclassOpt).toString();
        posclass = cmdLine.getValue(posclassOpt).toString();

        if (resampling.equalsIgnoreCase("overs")) {
            withOversampling = true;
        } else if (resampling.equalsIgnoreCase("unders")) {
            withUndersampling = true;
        } else if (resampling.equalsIgnoreCase("smote")) {
            withSmote = true;
        }

        if (cmdLine.hasOption(timeOpt)) {
            preprocessingTimeIsStored = true;
            timeName = cmdLine.getValue(timeOpt).toString();
        }

        if (log.isDebugEnabled()) {
            log.debug("data : {}", dataName);
            log.debug("dataset : {}", datasetName);
            log.debug("time : {}", timeName);
            log.debug("Oversampling : {}", withOversampling);
            log.debug("Undersampling : {}", withUndersampling);
            log.debug("SMOTE : {}", withSmote);
        }

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
        dataPreprocessingPath = new Path(dataPreprocessing);
        if (preprocessingTimeIsStored)
            timePath = new Path(timeName);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    if (withOversampling) {
        overSampling();
    } else if (withUndersampling) {
        underSampling();
    } else if (withSmote) {
        smote();
    }

    return 0;
}

From source file:org.apache.mahout.classifier.df.tools.ForestVisualizer.java

public static void main(String[] args) {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to the Decision Forest").create();

    Option attrNamesOpt = obuilder.withLongName("names").withShortName("n").withRequired(false)
            .withArgument(abuilder.withName("names").withMinimum(1).create())
            .withDescription("Optional, Attribute names").create();

    Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help")
            .create();//w w  w  . ja  v a 2  s  .c om

    Group group = gbuilder.withName("Options").withOption(datasetOpt).withOption(modelOpt)
            .withOption(attrNamesOpt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String datasetName = cmdLine.getValue(datasetOpt).toString();
        String modelName = cmdLine.getValue(modelOpt).toString();
        String[] attrNames = null;
        if (cmdLine.hasOption(attrNamesOpt)) {
            Collection<String> names = (Collection<String>) cmdLine.getValues(attrNamesOpt);
            if (!names.isEmpty()) {
                attrNames = new String[names.size()];
                names.toArray(attrNames);
            }
        }

        print(modelName, datasetName, attrNames);
    } catch (Exception e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.classifier.df.tools.Frequencies.java

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).create()).withDescription("dataset path")
            .create();//from  w  ww  .  j a  v a  2  s .  c o  m

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(helpOpt)
            .create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return 0;
        }

        String dataPath = cmdLine.getValue(dataOpt).toString();
        String datasetPath = cmdLine.getValue(datasetOpt).toString();

        log.debug("Data path : {}", dataPath);
        log.debug("Dataset path : {}", datasetPath);

        runTool(dataPath, datasetPath);
    } catch (OptionException e) {
        log.warn(e.toString(), e);
        CommandLineUtil.printHelp(group);
    }

    return 0;
}

From source file:org.apache.mahout.classifier.df.tools.UDistrib.java

/**
 * Launch the uniform distribution tool. Requires the following command line arguments:<br>
 * /*from  w w w.j  a  v  a  2  s. c o m*/
 * data : data path dataset : dataset path numpartitions : num partitions output : output path
 *
 * @throws java.io.IOException
 */
public static void main(String[] args) throws IOException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("data").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).create()).withDescription("Dataset path")
            .create();

    Option outputOpt = obuilder.withLongName("output").withShortName("o").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to generated files").create();

    Option partitionsOpt = obuilder.withLongName("numpartitions").withShortName("p").withRequired(true)
            .withArgument(abuilder.withName("numparts").withMinimum(1).withMinimum(1).create())
            .withDescription("Number of partitions to create").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(outputOpt).withOption(datasetOpt)
            .withOption(partitionsOpt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String data = cmdLine.getValue(dataOpt).toString();
        String dataset = cmdLine.getValue(datasetOpt).toString();
        int numPartitions = Integer.parseInt(cmdLine.getValue(partitionsOpt).toString());
        String output = cmdLine.getValue(outputOpt).toString();

        runTool(data, dataset, output, numPartitions);
    } catch (OptionException e) {
        log.warn(e.toString(), e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:org.apache.mahout.classifier.mlp.RunMultilayerPerceptron.java

/**
 * Parse the arguments./*w ww  . j av a 2s  . com*/
 *
 * @param args The input arguments.
 * @param parameters  The parameters need to be filled.
 * @return true or false
 * @throws Exception
 */
private static boolean parseArgs(String[] args, Parameters parameters) throws Exception {
    // build the options
    log.info("Validate and parse arguments...");
    DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
    GroupBuilder groupBuilder = new GroupBuilder();
    ArgumentBuilder argumentBuilder = new ArgumentBuilder();

    Option inputFileFormatOption = optionBuilder
            .withLongName("format").withShortName("f").withArgument(argumentBuilder.withName("file type")
                    .withDefault("csv").withMinimum(1).withMaximum(1).create())
            .withDescription("type of input file, currently support 'csv'").create();

    List<Integer> columnRangeDefault = Lists.newArrayList();
    columnRangeDefault.add(0);
    columnRangeDefault.add(Integer.MAX_VALUE);

    Option skipHeaderOption = optionBuilder.withLongName("skipHeader").withShortName("sh").withRequired(false)
            .withDescription("whether to skip the first row of the input file").create();

    Option inputColumnRangeOption = optionBuilder.withLongName("columnRange").withShortName("cr")
            .withDescription("the column range of the input file, start from 0").withArgument(argumentBuilder
                    .withName("range").withMinimum(2).withMaximum(2).withDefaults(columnRangeDefault).create())
            .create();

    Group inputFileTypeGroup = groupBuilder.withOption(skipHeaderOption).withOption(inputColumnRangeOption)
            .withOption(inputFileFormatOption).create();

    Option inputOption = optionBuilder.withLongName("input").withShortName("i").withRequired(true)
            .withArgument(argumentBuilder.withName("file path").withMinimum(1).withMaximum(1).create())
            .withDescription("the file path of unlabelled dataset").withChildren(inputFileTypeGroup).create();

    Option modelOption = optionBuilder.withLongName("model").withShortName("mo").withRequired(true)
            .withArgument(argumentBuilder.withName("model file").withMinimum(1).withMaximum(1).create())
            .withDescription("the file path of the model").create();

    Option labelsOption = optionBuilder.withLongName("labels").withShortName("labels")
            .withArgument(argumentBuilder.withName("label-name").withMinimum(2).create())
            .withDescription("an ordered list of label names").create();

    Group labelsGroup = groupBuilder.withOption(labelsOption).create();

    Option outputOption = optionBuilder.withLongName("output").withShortName("o").withRequired(true)
            .withArgument(
                    argumentBuilder.withConsumeRemaining("file path").withMinimum(1).withMaximum(1).create())
            .withDescription("the file path of labelled results").withChildren(labelsGroup).create();

    // parse the input
    Parser parser = new Parser();
    Group normalOption = groupBuilder.withOption(inputOption).withOption(modelOption).withOption(outputOption)
            .create();
    parser.setGroup(normalOption);
    CommandLine commandLine = parser.parseAndHelp(args);
    if (commandLine == null) {
        return false;
    }

    // obtain the arguments
    parameters.inputFilePathStr = TrainMultilayerPerceptron.getString(commandLine, inputOption);
    parameters.inputFileFormat = TrainMultilayerPerceptron.getString(commandLine, inputFileFormatOption);
    parameters.skipHeader = commandLine.hasOption(skipHeaderOption);
    parameters.modelFilePathStr = TrainMultilayerPerceptron.getString(commandLine, modelOption);
    parameters.outputFilePathStr = TrainMultilayerPerceptron.getString(commandLine, outputOption);

    List<?> columnRange = commandLine.getValues(inputColumnRangeOption);
    parameters.columnStart = Integer.parseInt(columnRange.get(0).toString());
    parameters.columnEnd = Integer.parseInt(columnRange.get(1).toString());

    return true;
}

From source file:org.apache.mahout.classifier.mlp.TrainMultilayerPerceptron.java

/**
 * Parse the input arguments./*from   w  w  w.  j  a  v  a  2s .c  o  m*/
 * 
 * @param args The input arguments
 * @param parameters The parameters parsed.
 * @return Whether the input arguments are valid.
 * @throws Exception
 */
private static boolean parseArgs(String[] args, Parameters parameters) throws Exception {
    // build the options
    log.info("Validate and parse arguments...");
    DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
    GroupBuilder groupBuilder = new GroupBuilder();
    ArgumentBuilder argumentBuilder = new ArgumentBuilder();

    // whether skip the first row of the input file
    Option skipHeaderOption = optionBuilder.withLongName("skipHeader").withShortName("sh").create();

    Group skipHeaderGroup = groupBuilder.withOption(skipHeaderOption).create();

    Option inputOption = optionBuilder.withLongName("input").withShortName("i").withRequired(true)
            .withChildren(skipHeaderGroup)
            .withArgument(argumentBuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("the file path of training dataset").create();

    Option labelsOption = optionBuilder.withLongName("labels").withShortName("labels").withRequired(true)
            .withArgument(argumentBuilder.withName("label-name").withMinimum(2).create())
            .withDescription("label names").create();

    Option updateOption = optionBuilder.withLongName("update").withShortName("u")
            .withDescription("whether to incrementally update model if the model exists").create();

    Group modelUpdateGroup = groupBuilder.withOption(updateOption).create();

    Option modelOption = optionBuilder.withLongName("model").withShortName("mo").withRequired(true)
            .withArgument(argumentBuilder.withName("model-path").withMinimum(1).withMaximum(1).create())
            .withDescription("the path to store the trained model").withChildren(modelUpdateGroup).create();

    Option layerSizeOption = optionBuilder.withLongName("layerSize").withShortName("ls").withRequired(true)
            .withArgument(argumentBuilder.withName("size of layer").withMinimum(2).withMaximum(5).create())
            .withDescription("the size of each layer").create();

    Option squashingFunctionOption = optionBuilder.withLongName("squashingFunction").withShortName("sf")
            .withArgument(argumentBuilder.withName("squashing function").withMinimum(1).withMaximum(1)
                    .withDefault("Sigmoid").create())
            .withDescription("the name of squashing function (currently only supports Sigmoid)").create();

    Option learningRateOption = optionBuilder.withLongName("learningRate").withShortName("l")
            .withArgument(argumentBuilder.withName("learning rate").withMaximum(1).withMinimum(1)
                    .withDefault(NeuralNetwork.DEFAULT_LEARNING_RATE).create())
            .withDescription("learning rate").create();

    Option momemtumOption = optionBuilder.withLongName("momemtumWeight").withShortName("m")
            .withArgument(argumentBuilder.withName("momemtum weight").withMaximum(1).withMinimum(1)
                    .withDefault(NeuralNetwork.DEFAULT_MOMENTUM_WEIGHT).create())
            .withDescription("momemtum weight").create();

    Option regularizationOption = optionBuilder.withLongName("regularizationWeight").withShortName("r")
            .withArgument(argumentBuilder.withName("regularization weight").withMaximum(1).withMinimum(1)
                    .withDefault(NeuralNetwork.DEFAULT_REGULARIZATION_WEIGHT).create())
            .withDescription("regularization weight").create();

    // parse the input
    Parser parser = new Parser();
    Group normalOptions = groupBuilder.withOption(inputOption).withOption(skipHeaderOption)
            .withOption(updateOption).withOption(labelsOption).withOption(modelOption)
            .withOption(layerSizeOption).withOption(squashingFunctionOption).withOption(learningRateOption)
            .withOption(momemtumOption).withOption(regularizationOption).create();

    parser.setGroup(normalOptions);

    CommandLine commandLine = parser.parseAndHelp(args);
    if (commandLine == null) {
        return false;
    }

    parameters.learningRate = getDouble(commandLine, learningRateOption);
    parameters.momemtumWeight = getDouble(commandLine, momemtumOption);
    parameters.regularizationWeight = getDouble(commandLine, regularizationOption);

    parameters.inputFilePath = getString(commandLine, inputOption);
    parameters.skipHeader = commandLine.hasOption(skipHeaderOption);

    List<String> labelsList = getStringList(commandLine, labelsOption);
    int currentIndex = 0;
    for (String label : labelsList) {
        parameters.labelsIndex.put(label, currentIndex++);
    }

    parameters.modelFilePath = getString(commandLine, modelOption);
    parameters.updateModel = commandLine.hasOption(updateOption);

    parameters.layerSizeList = getIntegerList(commandLine, layerSizeOption);

    parameters.squashingFunctionName = getString(commandLine, squashingFunctionOption);

    System.out.printf(
            "Input: %s, Model: %s, Update: %s, Layer size: %s, Squashing function: %s, Learning rate: %f,"
                    + " Momemtum weight: %f, Regularization Weight: %f\n",
            parameters.inputFilePath, parameters.modelFilePath, parameters.updateModel,
            Arrays.toString(parameters.layerSizeList.toArray()), parameters.squashingFunctionName,
            parameters.learningRate, parameters.momemtumWeight, parameters.regularizationWeight);

    return true;
}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.BaumWelchTrainer.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
    ArgumentBuilder argumentBuilder = new ArgumentBuilder();

    Option inputOption = DefaultOptionCreator.inputOption().create();

    Option outputOption = DefaultOptionCreator.outputOption().create();

    Option stateNumberOption = optionBuilder.withLongName("nrOfHiddenStates")
            .withDescription("Number of hidden states").withShortName("nh")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create())
            .withRequired(true).create();

    Option observedStateNumberOption = optionBuilder.withLongName("nrOfObservedStates")
            .withDescription("Number of observed states").withShortName("no")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create())
            .withRequired(true).create();

    Option epsilonOption = optionBuilder.withLongName("epsilon").withDescription("Convergence threshold")
            .withShortName("e")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create())
            .withRequired(true).create();

    Option iterationsOption = optionBuilder.withLongName("max-iterations")
            .withDescription("Maximum iterations number").withShortName("m")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create())
            .withRequired(true).create();

    Group optionGroup = new GroupBuilder().withOption(inputOption).withOption(outputOption)
            .withOption(stateNumberOption).withOption(observedStateNumberOption).withOption(epsilonOption)
            .withOption(iterationsOption).withName("Options").create();

    try {/*from w w w  . j av  a2  s .  c  o m*/
        Parser parser = new Parser();
        parser.setGroup(optionGroup);
        CommandLine commandLine = parser.parse(args);

        String input = (String) commandLine.getValue(inputOption);
        String output = (String) commandLine.getValue(outputOption);

        int nrOfHiddenStates = Integer.parseInt((String) commandLine.getValue(stateNumberOption));
        int nrOfObservedStates = Integer.parseInt((String) commandLine.getValue(observedStateNumberOption));

        double epsilon = Double.parseDouble((String) commandLine.getValue(epsilonOption));
        int maxIterations = Integer.parseInt((String) commandLine.getValue(iterationsOption));

        //constructing random-generated HMM
        HmmModel model = new HmmModel(nrOfHiddenStates, nrOfObservedStates, new Date().getTime());
        List<Integer> observations = Lists.newArrayList();

        //reading observations
        Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8");
        try {
            while (scanner.hasNextInt()) {
                observations.add(scanner.nextInt());
            }
        } finally {
            scanner.close();
        }

        int[] observationsArray = new int[observations.size()];
        for (int i = 0; i < observations.size(); ++i) {
            observationsArray[i] = observations.get(i);
        }

        //training
        HmmModel trainedModel = HmmTrainer.trainBaumWelch(model, observationsArray, epsilon, maxIterations,
                true);

        //serializing trained model
        DataOutputStream stream = new DataOutputStream(new FileOutputStream(output));
        try {
            LossyHmmSerializer.serialize(trainedModel, stream);
        } finally {
            Closeables.close(stream, false);
        }

        //printing tranied model
        System.out.println("Initial probabilities: ");
        for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) {
            System.out.print(i + " ");
        }
        System.out.println();
        for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) {
            System.out.print(trainedModel.getInitialProbabilities().get(i) + " ");
        }
        System.out.println();

        System.out.println("Transition matrix:");
        System.out.print("  ");
        for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) {
            System.out.print(i + " ");
        }
        System.out.println();
        for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) {
            System.out.print(i + " ");
            for (int j = 0; j < trainedModel.getNrOfHiddenStates(); ++j) {
                System.out.print(trainedModel.getTransitionMatrix().get(i, j) + " ");
            }
            System.out.println();
        }
        System.out.println("Emission matrix: ");
        System.out.print("  ");
        for (int i = 0; i < trainedModel.getNrOfOutputStates(); ++i) {
            System.out.print(i + " ");
        }
        System.out.println();
        for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) {
            System.out.print(i + " ");
            for (int j = 0; j < trainedModel.getNrOfOutputStates(); ++j) {
                System.out.print(trainedModel.getEmissionMatrix().get(i, j) + " ");
            }
            System.out.println();
        }
    } catch (OptionException e) {
        CommandLineUtil.printHelp(optionGroup);
    }
}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchDriver.java

@Override
public int run(String[] args) throws Exception {

    DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
    ArgumentBuilder argumentBuilder = new ArgumentBuilder();

    Option inputOption = optionBuilder.withLongName("input")
            .withDescription("Sequence file containing VectorWritables as training sequence").withShortName("i")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create())
            .withRequired(true).create();

    Option outputOption = optionBuilder.withLongName("output")
            .withDescription("Output path to store the trained model encoded as Sequence Files")
            .withShortName("o")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create())
            .withRequired(true).create();

    Option modelOption = optionBuilder.withLongName("model")
            .withDescription("Initial HmmModel encoded as a Sequence File. "
                    + "Will be constructed with a random distribution if the 'buildRandom' option is set to true.")
            .withShortName("im")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create())
            .withRequired(false).create();

    Option hiddenStateMapPath = optionBuilder.withLongName("hiddenStateToIDMap")
            .withDescription("Hidden states to ID map path.").withShortName("hmap")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create())
            .withRequired(true).create();

    Option emitStateMapPath = optionBuilder.withLongName("emittedStateToIDMap")
            .withDescription("Emitted states to ID map path.").withShortName("smap")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create())
            .withRequired(true).create();

    Option randomOption = optionBuilder.withLongName("buildRandom")
            .withDescription(//from w w w.  j  a  v  a  2s .  co  m
                    "Optional argument to generate a random initial HmmModel and store it in 'model' directory")
            .withShortName("r")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("boolean").create())
            .withRequired(false).create();

    Option scalingOption = optionBuilder.withLongName("Scaling")
            .withDescription("Optional argument to invoke scaled training").withShortName("l")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("string").create())
            .withRequired(true).create();

    Option stateNumberOption = optionBuilder.withLongName("nrOfHiddenStates")
            .withDescription("Number of hidden states").withShortName("nh")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create())
            .withRequired(true).create();

    Option observedStateNumberOption = optionBuilder.withLongName("nrOfObservedStates")
            .withDescription("Number of observed states").withShortName("no")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create())
            .withRequired(true).create();

    Option epsilonOption = optionBuilder.withLongName("epsilon").withDescription("Convergence threshold")
            .withShortName("e")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create())
            .withRequired(true).create();

    Option iterationsOption = optionBuilder.withLongName("maxIterations")
            .withDescription("Maximum iterations number").withShortName("m")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create())
            .withRequired(true).create();

    Group optionGroup = new GroupBuilder().withOption(inputOption).withOption(outputOption)
            .withOption(modelOption).withOption(hiddenStateMapPath).withOption(emitStateMapPath)
            .withOption(randomOption).withOption(scalingOption).withOption(stateNumberOption)
            .withOption(observedStateNumberOption).withOption(epsilonOption).withOption(iterationsOption)
            .withName("Options").create();

    try {
        Parser parser = new Parser();
        parser.setGroup(optionGroup);
        CommandLine commandLine = parser.parse(args);

        String input = (String) commandLine.getValue(inputOption);
        String output = (String) commandLine.getValue(outputOption);
        String modelIn = (String) commandLine.getValue(modelOption);
        String hiddenStateToIdMap = (String) commandLine.getValue(hiddenStateMapPath);
        String emittedStateToIdMap = (String) commandLine.getValue(emitStateMapPath);

        Boolean buildRandom = commandLine.hasOption(randomOption);
        String scaling = (String) commandLine.getValue(scalingOption);

        int numHidden = Integer.parseInt((String) commandLine.getValue(stateNumberOption));
        int numObserved = Integer.parseInt((String) commandLine.getValue(observedStateNumberOption));

        double convergenceDelta = Double.parseDouble((String) commandLine.getValue(epsilonOption));
        int maxIterations = Integer.parseInt((String) commandLine.getValue(iterationsOption));

        if (getConf() == null) {
            setConf(new Configuration());
        }
        if (buildRandom) {

            BaumWelchUtils.buildRandomModel(numHidden, numObserved, new Path(modelIn), getConf());
        }
        run(getConf(), new Path(input), new Path(modelIn), new Path(output), new Path(hiddenStateToIdMap),
                new Path(emittedStateToIdMap), numHidden, numObserved, convergenceDelta, scaling,
                maxIterations);
    } catch (OptionException e) {
        CommandLineUtil.printHelp(optionGroup);
    }

    return 0;

}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.RandomSequenceGenerator.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
    ArgumentBuilder argumentBuilder = new ArgumentBuilder();

    Option outputOption = optionBuilder.withLongName("output")
            .withDescription("Output file with sequence of observed states").withShortName("o")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create())
            .withRequired(false).create();

    Option modelOption = optionBuilder.withLongName("model").withDescription("Path to serialized HMM model")
            .withShortName("m")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create())
            .withRequired(true).create();

    Option lengthOption = optionBuilder.withLongName("length").withDescription("Length of generated sequence")
            .withShortName("l")
            .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create())
            .withRequired(true).create();

    Group optionGroup = new GroupBuilder().withOption(outputOption).withOption(modelOption)
            .withOption(lengthOption).withName("Options").create();

    try {//from  w ww  .j  a  v a  2 s  .c om
        Parser parser = new Parser();
        parser.setGroup(optionGroup);
        CommandLine commandLine = parser.parse(args);

        String output = (String) commandLine.getValue(outputOption);

        String modelPath = (String) commandLine.getValue(modelOption);

        int length = Integer.parseInt((String) commandLine.getValue(lengthOption));

        //reading serialized HMM
        DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath));
        HmmModel model;
        try {
            model = LossyHmmSerializer.deserialize(modelStream);
        } finally {
            Closeables.close(modelStream, true);
        }

        //generating observations
        int[] observations = HmmEvaluator.predict(model, length, System.currentTimeMillis());

        //writing output
        PrintWriter writer = new PrintWriter(
                new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true);
        try {
            for (int observation : observations) {
                writer.print(observation);
                writer.print(' ');
            }
        } finally {
            Closeables.close(writer, false);
        }
    } catch (OptionException e) {
        CommandLineUtil.printHelp(optionGroup);
    }
}