List of usage examples for org.apache.commons.cli2.commandline Parser parse
public CommandLine parse(final String[] arguments) throws OptionException
From source file:org.apache.mahout.classifier.df.BreimanExample.java
@Override public int run(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Data path").create(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()) .withDescription("Dataset path").create(); Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true) .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create()) .withDescription("Number of trees to grow, each iteration").create(); Option nbItersOpt = obuilder.withLongName("iterations").withShortName("i").withRequired(true) .withArgument(abuilder.withName("numIterations").withMinimum(1).withMaximum(1).create()) .withDescription("Number of times to repeat the test").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create();//from w ww . j a va 2 s . co m Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(nbItersOpt) .withOption(nbtreesOpt).withOption(helpOpt).create(); Path dataPath; Path datasetPath; int nbTrees; int nbIterations; try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption("help")) { CommandLineUtil.printHelp(group); return -1; } String dataName = cmdLine.getValue(dataOpt).toString(); String datasetName = cmdLine.getValue(datasetOpt).toString(); nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString()); nbIterations = Integer.parseInt(cmdLine.getValue(nbItersOpt).toString()); dataPath = new Path(dataName); datasetPath = new Path(datasetName); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); return -1; } // load the data FileSystem fs = dataPath.getFileSystem(new Configuration()); Dataset dataset = Dataset.load(getConf(), datasetPath); Data data = DataLoader.loadData(dataset, fs, dataPath); // take m to be the first integer less than log2(M) + 1, where M is the // number of inputs int m = (int) Math.floor(FastMath.log(2.0, data.getDataset().nbAttributes()) + 1); Random rng = RandomUtils.getRandom(); for (int iteration = 0; iteration < nbIterations; iteration++) { log.info("Iteration {}", iteration); runIteration(rng, data, m, nbTrees); } log.info("********************************************"); log.info("Random Input Test Error : {}", sumTestErrM / nbIterations); log.info("Single Input Test Error : {}", sumTestErrOne / nbIterations); log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations)); log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations)); log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations); log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations); return 0; }
From source file:org.apache.mahout.classifier.df.mapreduce.Resampling.java
public int run(String[] args) throws Exception, ClassNotFoundException, InterruptedException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Data path").create(); Option dataPreprocessingOpt = obuilder.withLongName("dataPreprocessing").withShortName("dp") .withRequired(true).withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Data Preprocessing path").create(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()) .withDescription("Dataset path").create(); Option timeOpt = obuilder.withLongName("time").withShortName("tm").withRequired(false) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Time path").create(); Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help") .create();/*ww w . j av a 2 s. c om*/ Option resamplingOpt = obuilder.withLongName("resampling").withShortName("rs").withRequired(true) .withArgument(abuilder.withName("resampling").withMinimum(1).withMaximum(1).create()) .withDescription( "The resampling technique (oversampling (overs), undersampling (unders) or SMOTE (smote))") .create(); Option nbpartitionsOpt = obuilder.withLongName("nbpartitions").withShortName("p").withRequired(true) .withArgument(abuilder.withName("nbpartitions").withMinimum(1).withMaximum(1).create()) .withDescription("Number of partitions").create(); Option nposOpt = obuilder.withLongName("npos").withShortName("npos").withRequired(true) .withArgument(abuilder.withName("npos").withMinimum(1).withMaximum(1).create()) .withDescription("Number of instances of the positive class").create(); Option nnegOpt = obuilder.withLongName("nneg").withShortName("nneg").withRequired(true) .withArgument(abuilder.withName("nneg").withMinimum(1).withMaximum(1).create()) .withDescription("Number of instances of the negative class").create(); Option negclassOpt = obuilder.withLongName("negclass").withShortName("negclass").withRequired(true) .withArgument(abuilder.withName("negclass").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the negative class").create(); Option posclassOpt = obuilder.withLongName("posclass").withShortName("posclass").withRequired(true) .withArgument(abuilder.withName("posclass").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the positive class").create(); Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(timeOpt) .withOption(helpOpt).withOption(resamplingOpt).withOption(dataPreprocessingOpt) .withOption(nbpartitionsOpt).withOption(nposOpt).withOption(nnegOpt).withOption(negclassOpt) .withOption(posclassOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption("help")) { CommandLineUtil.printHelp(group); return -1; } dataName = cmdLine.getValue(dataOpt).toString(); String datasetName = cmdLine.getValue(datasetOpt).toString(); dataPreprocessing = cmdLine.getValue(dataPreprocessingOpt).toString(); String resampling = cmdLine.getValue(resamplingOpt).toString(); partitions = Integer.parseInt(cmdLine.getValue(nbpartitionsOpt).toString()); npos = Integer.parseInt(cmdLine.getValue(nposOpt).toString()); nneg = Integer.parseInt(cmdLine.getValue(nnegOpt).toString()); negclass = cmdLine.getValue(negclassOpt).toString(); posclass = cmdLine.getValue(posclassOpt).toString(); if (resampling.equalsIgnoreCase("overs")) { withOversampling = true; } else if (resampling.equalsIgnoreCase("unders")) { withUndersampling = true; } else if (resampling.equalsIgnoreCase("smote")) { withSmote = true; } if (cmdLine.hasOption(timeOpt)) { preprocessingTimeIsStored = true; timeName = cmdLine.getValue(timeOpt).toString(); } if (log.isDebugEnabled()) { log.debug("data : {}", dataName); log.debug("dataset : {}", datasetName); log.debug("time : {}", timeName); log.debug("Oversampling : {}", withOversampling); log.debug("Undersampling : {}", withUndersampling); log.debug("SMOTE : {}", withSmote); } dataPath = new Path(dataName); datasetPath = new Path(datasetName); dataPreprocessingPath = new Path(dataPreprocessing); if (preprocessingTimeIsStored) timePath = new Path(timeName); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); return -1; } if (withOversampling) { overSampling(); } else if (withUndersampling) { underSampling(); } else if (withSmote) { smote(); } return 0; }
From source file:org.apache.mahout.classifier.df.tools.ForestVisualizer.java
public static void main(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()) .withDescription("Dataset path").create(); Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Path to the Decision Forest").create(); Option attrNamesOpt = obuilder.withLongName("names").withShortName("n").withRequired(false) .withArgument(abuilder.withName("names").withMinimum(1).create()) .withDescription("Optional, Attribute names").create(); Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help") .create();/* w w w . java 2 s . c om*/ Group group = gbuilder.withName("Options").withOption(datasetOpt).withOption(modelOpt) .withOption(attrNamesOpt).withOption(helpOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption("help")) { CommandLineUtil.printHelp(group); return; } String datasetName = cmdLine.getValue(datasetOpt).toString(); String modelName = cmdLine.getValue(modelOpt).toString(); String[] attrNames = null; if (cmdLine.hasOption(attrNamesOpt)) { Collection<String> names = (Collection<String>) cmdLine.getValues(attrNamesOpt); if (!names.isEmpty()) { attrNames = new String[names.size()]; names.toArray(attrNames); } } print(modelName, datasetName, attrNames); } catch (Exception e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.classifier.df.tools.Frequencies.java
@Override public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) .withDescription("Data path").create(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("path").withMinimum(1).create()).withDescription("dataset path") .create();//from w ww.j a va 2 s. co m Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(helpOpt) .create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return 0; } String dataPath = cmdLine.getValue(dataOpt).toString(); String datasetPath = cmdLine.getValue(datasetOpt).toString(); log.debug("Data path : {}", dataPath); log.debug("Dataset path : {}", datasetPath); runTool(dataPath, datasetPath); } catch (OptionException e) { log.warn(e.toString(), e); CommandLineUtil.printHelp(group); } return 0; }
From source file:org.apache.mahout.classifier.df.tools.UDistrib.java
/** * Launch the uniform distribution tool. Requires the following command line arguments:<br> * /* ww w.j a va 2 s .co m*/ * data : data path dataset : dataset path numpartitions : num partitions output : output path * * @throws java.io.IOException */ public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) .withArgument(abuilder.withName("data").withMinimum(1).withMaximum(1).create()) .withDescription("Data path").create(); Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) .withArgument(abuilder.withName("dataset").withMinimum(1).create()).withDescription("Dataset path") .create(); Option outputOpt = obuilder.withLongName("output").withShortName("o").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Path to generated files").create(); Option partitionsOpt = obuilder.withLongName("numpartitions").withShortName("p").withRequired(true) .withArgument(abuilder.withName("numparts").withMinimum(1).withMinimum(1).create()) .withDescription("Number of partitions to create").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(outputOpt).withOption(datasetOpt) .withOption(partitionsOpt).withOption(helpOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String data = cmdLine.getValue(dataOpt).toString(); String dataset = cmdLine.getValue(datasetOpt).toString(); int numPartitions = Integer.parseInt(cmdLine.getValue(partitionsOpt).toString()); String output = cmdLine.getValue(outputOpt).toString(); runTool(data, dataset, output, numPartitions); } catch (OptionException e) { log.warn(e.toString(), e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.classifier.sequencelearning.hmm.BaumWelchTrainer.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputOption = DefaultOptionCreator.inputOption().create(); Option outputOption = DefaultOptionCreator.outputOption().create(); Option stateNumberOption = optionBuilder.withLongName("nrOfHiddenStates") .withDescription("Number of hidden states").withShortName("nh") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option observedStateNumberOption = optionBuilder.withLongName("nrOfObservedStates") .withDescription("Number of observed states").withShortName("no") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option epsilonOption = optionBuilder.withLongName("epsilon").withDescription("Convergence threshold") .withShortName("e") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option iterationsOption = optionBuilder.withLongName("max-iterations") .withDescription("Maximum iterations number").withShortName("m") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Group optionGroup = new GroupBuilder().withOption(inputOption).withOption(outputOption) .withOption(stateNumberOption).withOption(observedStateNumberOption).withOption(epsilonOption) .withOption(iterationsOption).withName("Options").create(); try {/*from w w w. j a va 2 s. co m*/ Parser parser = new Parser(); parser.setGroup(optionGroup); CommandLine commandLine = parser.parse(args); String input = (String) commandLine.getValue(inputOption); String output = (String) commandLine.getValue(outputOption); int nrOfHiddenStates = Integer.parseInt((String) commandLine.getValue(stateNumberOption)); int nrOfObservedStates = Integer.parseInt((String) commandLine.getValue(observedStateNumberOption)); double epsilon = Double.parseDouble((String) commandLine.getValue(epsilonOption)); int maxIterations = Integer.parseInt((String) commandLine.getValue(iterationsOption)); //constructing random-generated HMM HmmModel model = new HmmModel(nrOfHiddenStates, nrOfObservedStates, new Date().getTime()); List<Integer> observations = Lists.newArrayList(); //reading observations Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8"); try { while (scanner.hasNextInt()) { observations.add(scanner.nextInt()); } } finally { scanner.close(); } int[] observationsArray = new int[observations.size()]; for (int i = 0; i < observations.size(); ++i) { observationsArray[i] = observations.get(i); } //training HmmModel trainedModel = HmmTrainer.trainBaumWelch(model, observationsArray, epsilon, maxIterations, true); //serializing trained model DataOutputStream stream = new DataOutputStream(new FileOutputStream(output)); try { LossyHmmSerializer.serialize(trainedModel, stream); } finally { Closeables.close(stream, false); } //printing tranied model System.out.println("Initial probabilities: "); for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { System.out.print(i + " "); } System.out.println(); for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { System.out.print(trainedModel.getInitialProbabilities().get(i) + " "); } System.out.println(); System.out.println("Transition matrix:"); System.out.print(" "); for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { System.out.print(i + " "); } System.out.println(); for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { System.out.print(i + " "); for (int j = 0; j < trainedModel.getNrOfHiddenStates(); ++j) { System.out.print(trainedModel.getTransitionMatrix().get(i, j) + " "); } System.out.println(); } System.out.println("Emission matrix: "); System.out.print(" "); for (int i = 0; i < trainedModel.getNrOfOutputStates(); ++i) { System.out.print(i + " "); } System.out.println(); for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { System.out.print(i + " "); for (int j = 0; j < trainedModel.getNrOfOutputStates(); ++j) { System.out.print(trainedModel.getEmissionMatrix().get(i, j) + " "); } System.out.println(); } } catch (OptionException e) { CommandLineUtil.printHelp(optionGroup); } }
From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchDriver.java
@Override public int run(String[] args) throws Exception { DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputOption = optionBuilder.withLongName("input") .withDescription("Sequence file containing VectorWritables as training sequence").withShortName("i") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option outputOption = optionBuilder.withLongName("output") .withDescription("Output path to store the trained model encoded as Sequence Files") .withShortName("o") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option modelOption = optionBuilder.withLongName("model") .withDescription("Initial HmmModel encoded as a Sequence File. " + "Will be constructed with a random distribution if the 'buildRandom' option is set to true.") .withShortName("im") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(false).create(); Option hiddenStateMapPath = optionBuilder.withLongName("hiddenStateToIDMap") .withDescription("Hidden states to ID map path.").withShortName("hmap") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option emitStateMapPath = optionBuilder.withLongName("emittedStateToIDMap") .withDescription("Emitted states to ID map path.").withShortName("smap") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option randomOption = optionBuilder.withLongName("buildRandom") .withDescription(/*from w w w . j av a2s . co m*/ "Optional argument to generate a random initial HmmModel and store it in 'model' directory") .withShortName("r") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("boolean").create()) .withRequired(false).create(); Option scalingOption = optionBuilder.withLongName("Scaling") .withDescription("Optional argument to invoke scaled training").withShortName("l") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("string").create()) .withRequired(true).create(); Option stateNumberOption = optionBuilder.withLongName("nrOfHiddenStates") .withDescription("Number of hidden states").withShortName("nh") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option observedStateNumberOption = optionBuilder.withLongName("nrOfObservedStates") .withDescription("Number of observed states").withShortName("no") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option epsilonOption = optionBuilder.withLongName("epsilon").withDescription("Convergence threshold") .withShortName("e") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Option iterationsOption = optionBuilder.withLongName("maxIterations") .withDescription("Maximum iterations number").withShortName("m") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Group optionGroup = new GroupBuilder().withOption(inputOption).withOption(outputOption) .withOption(modelOption).withOption(hiddenStateMapPath).withOption(emitStateMapPath) .withOption(randomOption).withOption(scalingOption).withOption(stateNumberOption) .withOption(observedStateNumberOption).withOption(epsilonOption).withOption(iterationsOption) .withName("Options").create(); try { Parser parser = new Parser(); parser.setGroup(optionGroup); CommandLine commandLine = parser.parse(args); String input = (String) commandLine.getValue(inputOption); String output = (String) commandLine.getValue(outputOption); String modelIn = (String) commandLine.getValue(modelOption); String hiddenStateToIdMap = (String) commandLine.getValue(hiddenStateMapPath); String emittedStateToIdMap = (String) commandLine.getValue(emitStateMapPath); Boolean buildRandom = commandLine.hasOption(randomOption); String scaling = (String) commandLine.getValue(scalingOption); int numHidden = Integer.parseInt((String) commandLine.getValue(stateNumberOption)); int numObserved = Integer.parseInt((String) commandLine.getValue(observedStateNumberOption)); double convergenceDelta = Double.parseDouble((String) commandLine.getValue(epsilonOption)); int maxIterations = Integer.parseInt((String) commandLine.getValue(iterationsOption)); if (getConf() == null) { setConf(new Configuration()); } if (buildRandom) { BaumWelchUtils.buildRandomModel(numHidden, numObserved, new Path(modelIn), getConf()); } run(getConf(), new Path(input), new Path(modelIn), new Path(output), new Path(hiddenStateToIdMap), new Path(emittedStateToIdMap), numHidden, numObserved, convergenceDelta, scaling, maxIterations); } catch (OptionException e) { CommandLineUtil.printHelp(optionGroup); } return 0; }
From source file:org.apache.mahout.classifier.sequencelearning.hmm.RandomSequenceGenerator.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option outputOption = optionBuilder.withLongName("output") .withDescription("Output file with sequence of observed states").withShortName("o") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(false).create(); Option modelOption = optionBuilder.withLongName("model").withDescription("Path to serialized HMM model") .withShortName("m") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option lengthOption = optionBuilder.withLongName("length").withDescription("Length of generated sequence") .withShortName("l") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("number").create()) .withRequired(true).create(); Group optionGroup = new GroupBuilder().withOption(outputOption).withOption(modelOption) .withOption(lengthOption).withName("Options").create(); try {/* w w w. j a v a2 s . c om*/ Parser parser = new Parser(); parser.setGroup(optionGroup); CommandLine commandLine = parser.parse(args); String output = (String) commandLine.getValue(outputOption); String modelPath = (String) commandLine.getValue(modelOption); int length = Integer.parseInt((String) commandLine.getValue(lengthOption)); //reading serialized HMM DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath)); HmmModel model; try { model = LossyHmmSerializer.deserialize(modelStream); } finally { Closeables.close(modelStream, true); } //generating observations int[] observations = HmmEvaluator.predict(model, length, System.currentTimeMillis()); //writing output PrintWriter writer = new PrintWriter( new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true); try { for (int observation : observations) { writer.print(observation); writer.print(' '); } } finally { Closeables.close(writer, false); } } catch (OptionException e) { CommandLineUtil.printHelp(optionGroup); } }
From source file:org.apache.mahout.classifier.sequencelearning.hmm.ViterbiEvaluator.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputOption = DefaultOptionCreator.inputOption().create(); Option outputOption = DefaultOptionCreator.outputOption().create(); Option modelOption = optionBuilder.withLongName("model").withDescription("Path to serialized HMM model") .withShortName("m") .withArgument(argumentBuilder.withMaximum(1).withMinimum(1).withName("path").create()) .withRequired(true).create(); Option likelihoodOption = optionBuilder.withLongName("likelihood") .withDescription("Compute likelihood of observed sequence").withShortName("l").withRequired(false) .create();// www .j a v a 2s . c om Group optionGroup = new GroupBuilder().withOption(inputOption).withOption(outputOption) .withOption(modelOption).withOption(likelihoodOption).withName("Options").create(); try { Parser parser = new Parser(); parser.setGroup(optionGroup); CommandLine commandLine = parser.parse(args); String input = (String) commandLine.getValue(inputOption); String output = (String) commandLine.getValue(outputOption); String modelPath = (String) commandLine.getValue(modelOption); boolean computeLikelihood = commandLine.hasOption(likelihoodOption); //reading serialized HMM DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath)); HmmModel model; try { model = LossyHmmSerializer.deserialize(modelStream); } finally { Closeables.close(modelStream, true); } //reading observations List<Integer> observations = Lists.newArrayList(); Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8"); try { while (scanner.hasNextInt()) { observations.add(scanner.nextInt()); } } finally { scanner.close(); } int[] observationsArray = new int[observations.size()]; for (int i = 0; i < observations.size(); ++i) { observationsArray[i] = observations.get(i); } //decoding int[] hiddenStates = HmmEvaluator.decode(model, observationsArray, true); //writing output PrintWriter writer = new PrintWriter( new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true); try { for (int hiddenState : hiddenStates) { writer.print(hiddenState); writer.print(' '); } } finally { Closeables.close(writer, false); } if (computeLikelihood) { System.out.println("Likelihood: " + HmmEvaluator.modelLikelihood(model, observationsArray, true)); } } catch (OptionException e) { CommandLineUtil.printHelp(optionGroup); } }
From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelClassifierDriver.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, OptionException { // example args: // -if /user/maximzhao/dataset/rcv1_test.binary -of // /user/maximzhao/rcv.result // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt // 1080// ww w .ja va 2 s . c o m log.info("[job] " + JOB_NAME); DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option testFileOpt = obuilder.withLongName("testFile").withRequired(true) .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create()) .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create(); Option outputFileOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Out put file name: ").withShortName("of").create(); Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false) .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create(); Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true) .withArgument(abuilder.withName("modelFile").withMinimum(1).withMaximum(1).create()) .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create(); Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false) .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max map Split size ").withShortName("ms").create(); Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false) .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max Heap Size: ").withShortName("mhs").create(); Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false) .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create()) .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create(); Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false) .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create()) .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt) .withOption(mapSplitSizeOpt).withOption(hdfsServerOpt).withOption(outputFileOpt) .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt) .withOption(helpOpt).create(); SVMParameters para = new SVMParameters(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } para.setTestFile(cmdLine.getValue(testFileOpt).toString()); para.setOutFile(cmdLine.getValue(outputFileOpt).toString()); para.setModelFileName(cmdLine.getValue(modelFileOpt).toString()); // hdfs server address if (cmdLine.hasOption(hdfsServerOpt)) { para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString()); } if (cmdLine.hasOption(mapSplitSizeOpt)) { para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString())); } if (cmdLine.hasOption(numberofReducersOpt)) { para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString())); } if (cmdLine.hasOption(maxHeapSizeOpt)) { para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString()); } if (cmdLine.hasOption(taskTimeoutOpt)) { para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString())); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } // set parameters for the mapper, combiner, reducer // creat a job Job job = new Job(new Configuration()); // step 1.1 set job static parameters ParallelClassifierJob.setJobParameters(job); // step 1.2 set mapper parameters ParallelClassifierJob.setMapperParameters(job.getConfiguration(), para.getHdfsServerAddr(), para.getModelFileName()); // set general parameters related to a job MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(), para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout()); // submit a job log.info("job completed: " + MapReduceUtil.submitJob(job)); }