List of usage examples for org.apache.commons.cli2.commandline Parser Parser
Parser
From source file:org.apache.mahout.classifier.df.BreimanExample.java
@Override public int run(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Data path").create(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()) .withDescription("Dataset path").create(); Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true) .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create()) .withDescription("Number of trees to grow, each iteration").create(); Option nbItersOpt = obuilder.withLongName("iterations").withShortName("i").withRequired(true) .withArgument(abuilder.withName("numIterations").withMinimum(1).withMaximum(1).create()) .withDescription("Number of times to repeat the test").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create();//from w w w . j ava2 s .c o m Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(nbItersOpt) .withOption(nbtreesOpt).withOption(helpOpt).create(); Path dataPath; Path datasetPath; int nbTrees; int nbIterations; try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption("help")) { CommandLineUtil.printHelp(group); return -1; } String dataName = cmdLine.getValue(dataOpt).toString(); String datasetName = cmdLine.getValue(datasetOpt).toString(); nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString()); nbIterations = Integer.parseInt(cmdLine.getValue(nbItersOpt).toString()); dataPath = new Path(dataName); datasetPath = new Path(datasetName); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); return -1; } // load the data FileSystem fs = dataPath.getFileSystem(new Configuration()); Dataset dataset = Dataset.load(getConf(), datasetPath); Data data = DataLoader.loadData(dataset, fs, dataPath); // take m to be the first integer less than log2(M) + 1, where M is the // number of inputs int m = (int) Math.floor(FastMath.log(2.0, data.getDataset().nbAttributes()) + 1); Random rng = RandomUtils.getRandom(); for (int iteration = 0; iteration < nbIterations; iteration++) { log.info("Iteration {}", iteration); runIteration(rng, data, m, nbTrees); } log.info("********************************************"); log.info("Random Input Test Error : {}", sumTestErrM / nbIterations); log.info("Single Input Test Error : {}", sumTestErrOne / nbIterations); log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations)); log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations)); log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations); log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations); return 0; }
From source file:org.apache.mahout.classifier.df.mapreduce.Resampling.java
public int run(String[] args) throws Exception, ClassNotFoundException, InterruptedException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Data path").create(); Option dataPreprocessingOpt = obuilder.withLongName("dataPreprocessing").withShortName("dp") .withRequired(true).withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Data Preprocessing path").create(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()) .withDescription("Dataset path").create(); Option timeOpt = obuilder.withLongName("time").withShortName("tm").withRequired(false) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Time path").create(); Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help") .create();/*from w w w. j a v a 2 s .c o m*/ Option resamplingOpt = obuilder.withLongName("resampling").withShortName("rs").withRequired(true) .withArgument(abuilder.withName("resampling").withMinimum(1).withMaximum(1).create()) .withDescription( "The resampling technique (oversampling (overs), undersampling (unders) or SMOTE (smote))") .create(); Option nbpartitionsOpt = obuilder.withLongName("nbpartitions").withShortName("p").withRequired(true) .withArgument(abuilder.withName("nbpartitions").withMinimum(1).withMaximum(1).create()) .withDescription("Number of partitions").create(); Option nposOpt = obuilder.withLongName("npos").withShortName("npos").withRequired(true) .withArgument(abuilder.withName("npos").withMinimum(1).withMaximum(1).create()) .withDescription("Number of instances of the positive class").create(); Option nnegOpt = obuilder.withLongName("nneg").withShortName("nneg").withRequired(true) .withArgument(abuilder.withName("nneg").withMinimum(1).withMaximum(1).create()) .withDescription("Number of instances of the negative class").create(); Option negclassOpt = obuilder.withLongName("negclass").withShortName("negclass").withRequired(true) .withArgument(abuilder.withName("negclass").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the negative class").create(); Option posclassOpt = obuilder.withLongName("posclass").withShortName("posclass").withRequired(true) .withArgument(abuilder.withName("posclass").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the positive class").create(); Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(timeOpt) .withOption(helpOpt).withOption(resamplingOpt).withOption(dataPreprocessingOpt) .withOption(nbpartitionsOpt).withOption(nposOpt).withOption(nnegOpt).withOption(negclassOpt) .withOption(posclassOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption("help")) { CommandLineUtil.printHelp(group); return -1; } dataName = cmdLine.getValue(dataOpt).toString(); String datasetName = cmdLine.getValue(datasetOpt).toString(); dataPreprocessing = cmdLine.getValue(dataPreprocessingOpt).toString(); String resampling = cmdLine.getValue(resamplingOpt).toString(); partitions = Integer.parseInt(cmdLine.getValue(nbpartitionsOpt).toString()); npos = Integer.parseInt(cmdLine.getValue(nposOpt).toString()); nneg = Integer.parseInt(cmdLine.getValue(nnegOpt).toString()); negclass = cmdLine.getValue(negclassOpt).toString(); posclass = cmdLine.getValue(posclassOpt).toString(); if (resampling.equalsIgnoreCase("overs")) { withOversampling = true; } else if (resampling.equalsIgnoreCase("unders")) { withUndersampling = true; } else if (resampling.equalsIgnoreCase("smote")) { withSmote = true; } if (cmdLine.hasOption(timeOpt)) { preprocessingTimeIsStored = true; timeName = cmdLine.getValue(timeOpt).toString(); } if (log.isDebugEnabled()) { log.debug("data : {}", dataName); log.debug("dataset : {}", datasetName); log.debug("time : {}", timeName); log.debug("Oversampling : {}", withOversampling); log.debug("Undersampling : {}", withUndersampling); log.debug("SMOTE : {}", withSmote); } dataPath = new Path(dataName); datasetPath = new Path(datasetName); dataPreprocessingPath = new Path(dataPreprocessing); if (preprocessingTimeIsStored) timePath = new Path(timeName); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); return -1; } if (withOversampling) { overSampling(); } else if (withUndersampling) { underSampling(); } else if (withSmote) { smote(); } return 0; }
From source file:org.apache.mahout.classifier.df.tools.ForestVisualizer.java
public static void main(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()) .withDescription("Dataset path").create(); Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Path to the Decision Forest").create(); Option attrNamesOpt = obuilder.withLongName("names").withShortName("n").withRequired(false) .withArgument(abuilder.withName("names").withMinimum(1).create()) .withDescription("Optional, Attribute names").create(); Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help") .create();/*from ww w . j a v a 2s .c o m*/ Group group = gbuilder.withName("Options").withOption(datasetOpt).withOption(modelOpt) .withOption(attrNamesOpt).withOption(helpOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption("help")) { CommandLineUtil.printHelp(group); return; } String datasetName = cmdLine.getValue(datasetOpt).toString(); String modelName = cmdLine.getValue(modelOpt).toString(); String[] attrNames = null; if (cmdLine.hasOption(attrNamesOpt)) { Collection<String> names = (Collection<String>) cmdLine.getValues(attrNamesOpt); if (!names.isEmpty()) { attrNames = new String[names.size()]; names.toArray(attrNames); } } print(modelName, datasetName, attrNames); } catch (Exception e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.classifier.df.tools.Frequencies.java
@Override public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Data path").create(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).create()).withDescription("dataset path") .create();/*from ww w. j a v a 2 s . c o m*/ Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(helpOpt) .create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return 0; } String dataPath = cmdLine.getValue(dataOpt).toString(); String datasetPath = cmdLine.getValue(datasetOpt).toString(); log.debug("Data path : {}", dataPath); log.debug("Dataset path : {}", datasetPath); runTool(dataPath, datasetPath); } catch (OptionException e) { log.warn(e.toString(), e); CommandLineUtil.printHelp(group); } return 0; }
From source file:org.apache.mahout.classifier.df.tools.UDistrib.java
/** * Launch the uniform distribution tool. Requires the following command line arguments:<br> * //from w ww .j a v a 2 s . com * data : data path dataset : dataset path numpartitions : num partitions output : output path * * @throws java.io.IOException */ public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) .withArgument(abuilder.withName("data").withMinimum(1).withMaximum(1).create()) .withDescription("Data path").create(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("dataset").withMinimum(1).create()).withDescription("Dataset path") .create(); Option outputOpt = obuilder.withLongName("output").withShortName("o").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Path to generated files").create(); Option partitionsOpt = obuilder.withLongName("numpartitions").withShortName("p").withRequired(true) .withArgument(abuilder.withName("numparts").withMinimum(1).withMinimum(1).create()) .withDescription("Number of partitions to create").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(outputOpt).withOption(datasetOpt) .withOption(partitionsOpt).withOption(helpOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String data = cmdLine.getValue(dataOpt).toString(); String dataset = cmdLine.getValue(datasetOpt).toString(); int numPartitions = Integer.parseInt(cmdLine.getValue(partitionsOpt).toString()); String output = cmdLine.getValue(outputOpt).toString(); runTool(data, dataset, output, numPartitions); } catch (OptionException e) { log.warn(e.toString(), e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.classifier.mlp.RunMultilayerPerceptron.java
/** * Parse the arguments.// w w w. j a va 2 s. c o m * * @param args The input arguments. * @param parameters The parameters need to be filled. * @return true or false * @throws Exception */ private static boolean parseArgs(String[] args, Parameters parameters) throws Exception { // build the options log.info("Validate and parse arguments..."); DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); GroupBuilder groupBuilder = new GroupBuilder(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFileFormatOption = optionBuilder .withLongName("format").withShortName("f").withArgument(argumentBuilder.withName("file type") .withDefault("csv").withMinimum(1).withMaximum(1).create()) .withDescription("type of input file, currently support 'csv'").create(); List<Integer> columnRangeDefault = Lists.newArrayList(); columnRangeDefault.add(0); columnRangeDefault.add(Integer.MAX_VALUE); Option skipHeaderOption = optionBuilder.withLongName("skipHeader").withShortName("sh").withRequired(false) .withDescription("whether to skip the first row of the input file").create(); Option inputColumnRangeOption = optionBuilder.withLongName("columnRange").withShortName("cr") .withDescription("the column range of the input file, start from 0").withArgument(argumentBuilder .withName("range").withMinimum(2).withMaximum(2).withDefaults(columnRangeDefault).create()) .create(); Group inputFileTypeGroup = groupBuilder.withOption(skipHeaderOption).withOption(inputColumnRangeOption) .withOption(inputFileFormatOption).create(); Option inputOption = optionBuilder.withLongName("input").withShortName("i").withRequired(true) .withArgument(argumentBuilder.withName("file path").withMinimum(1).withMaximum(1).create()) .withDescription("the file path of unlabelled dataset").withChildren(inputFileTypeGroup).create(); Option modelOption = optionBuilder.withLongName("model").withShortName("mo").withRequired(true) .withArgument(argumentBuilder.withName("model file").withMinimum(1).withMaximum(1).create()) .withDescription("the file path of the model").create(); Option labelsOption = optionBuilder.withLongName("labels").withShortName("labels") .withArgument(argumentBuilder.withName("label-name").withMinimum(2).create()) .withDescription("an ordered list of label names").create(); Group labelsGroup = groupBuilder.withOption(labelsOption).create(); Option outputOption = optionBuilder.withLongName("output").withShortName("o").withRequired(true) .withArgument( argumentBuilder.withConsumeRemaining("file path").withMinimum(1).withMaximum(1).create()) .withDescription("the file path of labelled results").withChildren(labelsGroup).create(); // parse the input Parser parser = new Parser(); Group normalOption = groupBuilder.withOption(inputOption).withOption(modelOption).withOption(outputOption) .create(); parser.setGroup(normalOption); CommandLine commandLine = parser.parseAndHelp(args); if (commandLine == null) { return false; } // obtain the arguments parameters.inputFilePathStr = TrainMultilayerPerceptron.getString(commandLine, inputOption); parameters.inputFileFormat = TrainMultilayerPerceptron.getString(commandLine, inputFileFormatOption); parameters.skipHeader = commandLine.hasOption(skipHeaderOption); parameters.modelFilePathStr = TrainMultilayerPerceptron.getString(commandLine, modelOption); parameters.outputFilePathStr = TrainMultilayerPerceptron.getString(commandLine, outputOption); List<?> columnRange = commandLine.getValues(inputColumnRangeOption); parameters.columnStart = Integer.parseInt(columnRange.get(0).toString()); parameters.columnEnd = Integer.parseInt(columnRange.get(1).toString()); return true; }
From source file:org.apache.mahout.classifier.mlp.TrainMultilayerPerceptron.java
/** * Parse the input arguments.//from w w w . ja v a 2 s .com * * @param args The input arguments * @param parameters The parameters parsed. * @return Whether the input arguments are valid. * @throws Exception */ private static boolean parseArgs(String[] args, Parameters parameters) throws Exception { // build the options log.info("Validate and parse arguments..."); DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); GroupBuilder groupBuilder = new GroupBuilder(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); // whether skip the first row of the input file Option skipHeaderOption = optionBuilder.withLongName("skipHeader").withShortName("sh").create(); Group skipHeaderGroup = groupBuilder.withOption(skipHeaderOption).create(); Option inputOption = optionBuilder.withLongName("input").withShortName("i").withRequired(true) .withChildren(skipHeaderGroup) .withArgument(argumentBuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("the file path of training dataset").create(); Option labelsOption = optionBuilder.withLongName("labels").withShortName("labels").withRequired(true) .withArgument(argumentBuilder.withName("label-name").withMinimum(2).create()) .withDescription("label names").create(); Option updateOption = optionBuilder.withLongName("update").withShortName("u") .withDescription("whether to incrementally update model if the model exists").create(); Group modelUpdateGroup = groupBuilder.withOption(updateOption).create(); Option modelOption = optionBuilder.withLongName("model").withShortName("mo").withRequired(true) .withArgument(argumentBuilder.withName("model-path").withMinimum(1).withMaximum(1).create()) .withDescription("the path to store the trained model").withChildren(modelUpdateGroup).create(); Option layerSizeOption = optionBuilder.withLongName("layerSize").withShortName("ls").withRequired(true) .withArgument(argumentBuilder.withName("size of layer").withMinimum(2).withMaximum(5).create()) .withDescription("the size of each layer").create(); Option squashingFunctionOption = optionBuilder.withLongName("squashingFunction").withShortName("sf") .withArgument(argumentBuilder.withName("squashing function").withMinimum(1).withMaximum(1) .withDefault("Sigmoid").create()) .withDescription("the name of squashing function (currently only supports Sigmoid)").create(); Option learningRateOption = optionBuilder.withLongName("learningRate").withShortName("l") .withArgument(argumentBuilder.withName("learning rate").withMaximum(1).withMinimum(1) .withDefault(NeuralNetwork.DEFAULT_LEARNING_RATE).create()) .withDescription("learning rate").create(); Option momemtumOption = optionBuilder.withLongName("momemtumWeight").withShortName("m") .withArgument(argumentBuilder.withName("momemtum weight").withMaximum(1).withMinimum(1) .withDefault(NeuralNetwork.DEFAULT_MOMENTUM_WEIGHT).create()) .withDescription("momemtum weight").create(); Option regularizationOption = optionBuilder.withLongName("regularizationWeight").withShortName("r") .withArgument(argumentBuilder.withName("regularization weight").withMaximum(1).withMinimum(1) .withDefault(NeuralNetwork.DEFAULT_REGULARIZATION_WEIGHT).create()) .withDescription("regularization weight").create(); // parse the input Parser parser = new Parser(); Group normalOptions = groupBuilder.withOption(inputOption).withOption(skipHeaderOption) .withOption(updateOption).withOption(labelsOption).withOption(modelOption) .withOption(layerSizeOption).withOption(squashingFunctionOption).withOption(learningRateOption) .withOption(momemtumOption).withOption(regularizationOption).create(); parser.setGroup(normalOptions); CommandLine commandLine = parser.parseAndHelp(args); if (commandLine == null) { return false; } parameters.learningRate = getDouble(commandLine, learningRateOption); parameters.momemtumWeight = getDouble(commandLine, momemtumOption); parameters.regularizationWeight = getDouble(commandLine, regularizationOption); parameters.inputFilePath = getString(commandLine, inputOption); parameters.skipHeader = commandLine.hasOption(skipHeaderOption); List<String> labelsList = getStringList(commandLine, labelsOption); int currentIndex = 0; for (String label : labelsList) { parameters.labelsIndex.put(label, currentIndex++); } parameters.modelFilePath = getString(commandLine, modelOption); parameters.updateModel = commandLine.hasOption(updateOption); parameters.layerSizeList = getIntegerList(commandLine, layerSizeOption); parameters.squashingFunctionName = getString(commandLine, squashingFunctionOption); System.out.printf( "Input: %s, Model: %s, Update: %s, Layer size: %s, Squashing function: %s, Learning rate: %f," + " Momemtum weight: %f, Regularization Weight: %f\n", parameters.inputFilePath, parameters.modelFilePath, parameters.updateModel, Arrays.toString(parameters.layerSizeList.toArray()), parameters.squashingFunctionName, parameters.learningRate, parameters.momemtumWeight, parameters.regularizationWeight); return true; }
From source file:org.apache.mahout.classifier.sequencelearning.hmm.BaumWelchTrainer.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputOption = DefaultOptionCreator.inputOption().create(); Option outputOption = DefaultOptionCreator.outputOption().create(); Option stateNumberOption = optionBuilder.withLongName("nrOfHiddenStates") .withDescription("Number of hidden states").withShortName("nh") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option observedStateNumberOption = optionBuilder.withLongName("nrOfObservedStates") .withDescription("Number of observed states").withShortName("no") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option epsilonOption = optionBuilder.withLongName("epsilon").withDescription("Convergence threshold") .withShortName("e") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option iterationsOption = optionBuilder.withLongName("max-iterations") .withDescription("Maximum iterations number").withShortName("m") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Group optionGroup = new GroupBuilder().withOption(inputOption).withOption(outputOption) .withOption(stateNumberOption).withOption(observedStateNumberOption).withOption(epsilonOption) .withOption(iterationsOption).withName("Options").create(); try {//from ww w .ja v a 2 s .c om Parser parser = new Parser(); parser.setGroup(optionGroup); CommandLine commandLine = parser.parse(args); String input = (String) commandLine.getValue(inputOption); String output = (String) commandLine.getValue(outputOption); int nrOfHiddenStates = Integer.parseInt((String) commandLine.getValue(stateNumberOption)); int nrOfObservedStates = Integer.parseInt((String) commandLine.getValue(observedStateNumberOption)); double epsilon = Double.parseDouble((String) commandLine.getValue(epsilonOption)); int maxIterations = Integer.parseInt((String) commandLine.getValue(iterationsOption)); //constructing random-generated HMM HmmModel model = new HmmModel(nrOfHiddenStates, nrOfObservedStates, new Date().getTime()); List<Integer> observations = Lists.newArrayList(); //reading observations Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8"); try { while (scanner.hasNextInt()) { observations.add(scanner.nextInt()); } } finally { scanner.close(); } int[] observationsArray = new int[observations.size()]; for (int i = 0; i < observations.size(); ++i) { observationsArray[i] = observations.get(i); } //training HmmModel trainedModel = HmmTrainer.trainBaumWelch(model, observationsArray, epsilon, maxIterations, true); //serializing trained model DataOutputStream stream = new DataOutputStream(new FileOutputStream(output)); try { LossyHmmSerializer.serialize(trainedModel, stream); } finally { Closeables.close(stream, false); } //printing tranied model System.out.println("Initial probabilities: "); for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { System.out.print(i + " "); } System.out.println(); for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { System.out.print(trainedModel.getInitialProbabilities().get(i) + " "); } System.out.println(); System.out.println("Transition matrix:"); System.out.print(" "); for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { System.out.print(i + " "); } System.out.println(); for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { System.out.print(i + " "); for (int j = 0; j < trainedModel.getNrOfHiddenStates(); ++j) { System.out.print(trainedModel.getTransitionMatrix().get(i, j) + " "); } System.out.println(); } System.out.println("Emission matrix: "); System.out.print(" "); for (int i = 0; i < trainedModel.getNrOfOutputStates(); ++i) { System.out.print(i + " "); } System.out.println(); for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { System.out.print(i + " "); for (int j = 0; j < trainedModel.getNrOfOutputStates(); ++j) { System.out.print(trainedModel.getEmissionMatrix().get(i, j) + " "); } System.out.println(); } } catch (OptionException e) { CommandLineUtil.printHelp(optionGroup); } }
From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchDriver.java
@Override public int run(String[] args) throws Exception { DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputOption = optionBuilder.withLongName("input") .withDescription("Sequence file containing VectorWritables as training sequence").withShortName("i") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option outputOption = optionBuilder.withLongName("output") .withDescription("Output path to store the trained model encoded as Sequence Files") .withShortName("o") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option modelOption = optionBuilder.withLongName("model") .withDescription("Initial HmmModel encoded as a Sequence File. " + "Will be constructed with a random distribution if the 'buildRandom' option is set to true.") .withShortName("im") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(false).create(); Option hiddenStateMapPath = optionBuilder.withLongName("hiddenStateToIDMap") .withDescription("Hidden states to ID map path.").withShortName("hmap") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option emitStateMapPath = optionBuilder.withLongName("emittedStateToIDMap") .withDescription("Emitted states to ID map path.").withShortName("smap") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option randomOption = optionBuilder.withLongName("buildRandom") .withDescription(/*from w w w.j a v a2 s . c o m*/ "Optional argument to generate a random initial HmmModel and store it in 'model' directory") .withShortName("r") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("boolean").create()) .withRequired(false).create(); Option scalingOption = optionBuilder.withLongName("Scaling") .withDescription("Optional argument to invoke scaled training").withShortName("l") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("string").create()) .withRequired(true).create(); Option stateNumberOption = optionBuilder.withLongName("nrOfHiddenStates") .withDescription("Number of hidden states").withShortName("nh") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option observedStateNumberOption = optionBuilder.withLongName("nrOfObservedStates") .withDescription("Number of observed states").withShortName("no") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option epsilonOption = optionBuilder.withLongName("epsilon").withDescription("Convergence threshold") .withShortName("e") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option iterationsOption = optionBuilder.withLongName("maxIterations") .withDescription("Maximum iterations number").withShortName("m") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Group optionGroup = new GroupBuilder().withOption(inputOption).withOption(outputOption) .withOption(modelOption).withOption(hiddenStateMapPath).withOption(emitStateMapPath) .withOption(randomOption).withOption(scalingOption).withOption(stateNumberOption) .withOption(observedStateNumberOption).withOption(epsilonOption).withOption(iterationsOption) .withName("Options").create(); try { Parser parser = new Parser(); parser.setGroup(optionGroup); CommandLine commandLine = parser.parse(args); String input = (String) commandLine.getValue(inputOption); String output = (String) commandLine.getValue(outputOption); String modelIn = (String) commandLine.getValue(modelOption); String hiddenStateToIdMap = (String) commandLine.getValue(hiddenStateMapPath); String emittedStateToIdMap = (String) commandLine.getValue(emitStateMapPath); Boolean buildRandom = commandLine.hasOption(randomOption); String scaling = (String) commandLine.getValue(scalingOption); int numHidden = Integer.parseInt((String) commandLine.getValue(stateNumberOption)); int numObserved = Integer.parseInt((String) commandLine.getValue(observedStateNumberOption)); double convergenceDelta = Double.parseDouble((String) commandLine.getValue(epsilonOption)); int maxIterations = Integer.parseInt((String) commandLine.getValue(iterationsOption)); if (getConf() == null) { setConf(new Configuration()); } if (buildRandom) { BaumWelchUtils.buildRandomModel(numHidden, numObserved, new Path(modelIn), getConf()); } run(getConf(), new Path(input), new Path(modelIn), new Path(output), new Path(hiddenStateToIdMap), new Path(emittedStateToIdMap), numHidden, numObserved, convergenceDelta, scaling, maxIterations); } catch (OptionException e) { CommandLineUtil.printHelp(optionGroup); } return 0; }
From source file:org.apache.mahout.classifier.sequencelearning.hmm.RandomSequenceGenerator.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option outputOption = optionBuilder.withLongName("output") .withDescription("Output file with sequence of observed states").withShortName("o") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(false).create(); Option modelOption = optionBuilder.withLongName("model").withDescription("Path to serialized HMM model") .withShortName("m") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option lengthOption = optionBuilder.withLongName("length").withDescription("Length of generated sequence") .withShortName("l") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Group optionGroup = new GroupBuilder().withOption(outputOption).withOption(modelOption) .withOption(lengthOption).withName("Options").create(); try {//from w w w . j a v a 2 s. c o m Parser parser = new Parser(); parser.setGroup(optionGroup); CommandLine commandLine = parser.parse(args); String output = (String) commandLine.getValue(outputOption); String modelPath = (String) commandLine.getValue(modelOption); int length = Integer.parseInt((String) commandLine.getValue(lengthOption)); //reading serialized HMM DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath)); HmmModel model; try { model = LossyHmmSerializer.deserialize(modelStream); } finally { Closeables.close(modelStream, true); } //generating observations int[] observations = HmmEvaluator.predict(model, length, System.currentTimeMillis()); //writing output PrintWriter writer = new PrintWriter( new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true); try { for (int observation : observations) { writer.print(observation); writer.print(' '); } } finally { Closeables.close(writer, false); } } catch (OptionException e) { CommandLineUtil.printHelp(optionGroup); } }