List of usage examples for org.apache.commons.cli2.commandline Parser parse
public CommandLine parse(final String[] arguments) throws OptionException
From source file:it.jnrpe.server.JNRPEServer.java
/** * Parses the command line./*from ww w . j a va2 s.c o m*/ * * @param vsArgs * The command line * @return The parsed command line */ private static CommandLine parseCommandLine(final String[] vsArgs) { try { Group opts = configureCommandLine(); // configure a HelpFormatter HelpFormatter hf = new HelpFormatter(); // configure a parser Parser p = new Parser(); p.setGroup(opts); p.setHelpFormatter(hf); // p.setHelpTrigger("--help"); return p.parse(vsArgs); } catch (OptionException oe) { printUsage(oe); } catch (Exception e) { e.printStackTrace(); // Should never happen... } return null; }
From source file:com.ibm.jaql.util.shell.JaqlShellArguments.java
@SuppressWarnings("unchecked") static JaqlShellArguments parseArgs(String... args) { // option builders final DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); final ArgumentBuilder abuilder = new ArgumentBuilder(); final GroupBuilder gbuilder = new GroupBuilder(); // create standard options Option optHelp = obuilder.withShortName("h").withShortName("?").withLongName("help") .withDescription("print this message").create(); Option optJars = obuilder.withShortName("j").withLongName("jars") .withDescription(//from w w w. j a v a 2s.co m "comma-separated list of jar files to include user defined expressions or data stores") .withArgument(abuilder.withName("args").withMinimum(1).withMaximum(1).create()).create(); Option optSearchPath = obuilder.withShortName("jp").withLongName("jaql-path") .withDescription("colon seperated list of all search path entries") .withArgument(abuilder.withName("args").withMinimum(1).withMaximum(1).create()).create(); Option optBatch = obuilder.withShortName("b").withLongName("batch") .withDescription("run in batch mode (i.e., do not read from stdin)").create(); Option optOutOptions = obuilder.withShortName("o").withLongName("outoptions") .withDescription("output options: json, del and xml or an output IO descriptor. " + "This option is ignored when not running in batch mode.") .withArgument(abuilder.withName("outoptions").withMinimum(1).withMaximum(1).create()).create(); Option optEval = obuilder.withShortName("e").withLongName("eval") .withDescription("evaluate Jaql expression") .withArgument(abuilder.withName("expr").withMinimum(1).withMaximum(1).create()).create(); // create mini-cluster options Option optCluster = obuilder.withShortName("c").withLongName("cluster") .withDescription("use existing cluster (i.e., do not launch a mini-cluster)").create(); Option optNumNodes = obuilder.withShortName("n").withLongName("no-nodes") .withDescription("mini-cluster option: number of nodes to spawn") .withArgument(abuilder.withName("arg").withMinimum(1).withMaximum(1) .withValidator(NumberValidator.getIntegerInstance()).create()) .create(); Option optDir = obuilder.withShortName("d").withLongName("hdfs-dir") .withDescription("mini-cluster option: root HDFs directory") .withArgument(abuilder.withName("arg").withMinimum(1).withMaximum(1).create()).create(); Group clusterOptions = gbuilder.withName("Cluster options").withOption(optCluster).withOption(optDir) .withOption(optNumNodes).create(); // create input files option Option optInputFiles = abuilder.withName("file").withDescription("list of input files").withMinimum(0) .create(); Option optLog = obuilder.withShortName("l").withLongName("log") .withDescription("log options: json, del and xml or an output IO descriptor. ") .withArgument(abuilder.withName("arg").withMinimum(1).withMaximum(1).create()).create(); // combine all options Group options = gbuilder.withName("options").withOption(optHelp).withOption(optJars) .withOption(optSearchPath).withOption(optBatch).withOption(optLog).withOption(optOutOptions) .withOption(optEval).withOption(optInputFiles).withOption(clusterOptions).create(); // parse and print help if necessary CommandLine cl; try { Parser parser = new Parser(); parser.setGroup(options); cl = parser.parse(args); } catch (OptionException e) { printHelpAndExit(e, null, options); return null; } if (cl.hasOption(optHelp)) { printHelpAndExit(null, options); } // validate arguments JaqlShellArguments result = new JaqlShellArguments(); // mini-cluster options if (cl.hasOption(optCluster)) { result.useExistingCluster = true; } if (cl.hasOption(optDir)) { if (result.useExistingCluster) { printHelpAndExit("Options " + optCluster.getPreferredName() + " and " + optDir.getPreferredName() + " are mutually exclusive", options); } result.hdfsDir = (String) cl.getValue(optDir); } if (cl.hasOption(optNumNodes)) { if (result.useExistingCluster) { printHelpAndExit("Options " + optCluster.getPreferredName() + " and " + optNumNodes.getPreferredName() + " are mutually exclusive", options); } result.numNodes = ((Number) cl.getValue(optNumNodes)).intValue(); } // jar files if (cl.hasOption(optJars)) { result.jars = ((String) cl.getValue(optJars)).split(","); for (String jar : result.jars) { if (!new File(jar).exists()) { printHelpAndExit("Jar file " + jar + " not found", options); } } } // search path directories if (cl.hasOption(optSearchPath)) { result.searchPath = ((String) cl.getValue(optSearchPath)).split(":"); for (String dir : result.searchPath) { if (!new File(dir).exists() || !new File(dir).isDirectory()) { printHelpAndExit("Search-path entry " + dir + " not found or is no directory", options); } } } if (cl.hasOption(optBatch)) { result.batchMode = true; if (cl.hasOption(optOutOptions)) { String format = (String) cl.getValue(optOutOptions); try { result.outputAdapter = getOutputAdapter(format); } catch (Exception e) { printHelpAndExit(e, "\"" + format + "\" is neither a valid output format nor a valid IO descriptor", options); } } } // input if (cl.hasOption(optEval)) { String eval = (String) cl.getValue(optEval); if (!eval.endsWith(";")) eval += ";"; result.addInputStream(new ByteArrayInputStream(eval.getBytes())); } if (cl.hasOption(optInputFiles)) { List<String> files = (List<String>) cl.getValues(optInputFiles); for (String file : files) { try { result.addInputStream(new FileInputStream(file)); } catch (FileNotFoundException e) { printHelpAndExit(e, "Input file " + file + " not found", options); } } } // error log if (cl.hasOption(optLog)) { String path = (String) cl.getValue(optLog); try { BufferedJsonRecord logFD = new BufferedJsonRecord(); logFD.add(Adapter.TYPE_NAME, new JsonString("local")); logFD.add(Adapter.LOCATION_NAME, new JsonString(path)); OutputAdapter oa = (OutputAdapter) JaqlUtil.getAdapterStore().output.getAdapter(logFD); result.logAdapter = oa; } catch (Exception e) { printHelpAndExit(e, "\"" + path + "\" invalid", options); } } if (!result.batchMode) { result.addStdin(); } return result; }
From source file:com.martinkampjensen.thesis.Main.java
private static CommandLine parse(String[] args, Group group) { final Parser parser = new Parser(); parser.setGroup(group);//from w w w . j ava2s . c o m CommandLine cmdLine = null; try { cmdLine = parser.parse(args); } catch (OptionException e) { System.out.println(e.getMessage()); exit(StatusCode.ARGUMENT); } return cmdLine; }
From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java
public static int main2(String[] args, Configuration conf) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having " + "one doc per line") .withShortName("i").create(); Option dictOpt = obuilder.withLongName("dictionary").withRequired(false) .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create()) .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create(); Option dfsOpt = obuilder.withLongName("dfs").withRequired(false) .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS namenode URI").withShortName("dfs").create(); Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true) .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create()) .withDescription("Number of topics to learn").withShortName("top").create(); Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true) .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to write out p(term | topic)").withShortName("to").create(); Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true) .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to write out p(topic | docid)").withShortName("do").create(); Option alphaOpt = obuilder.withLongName("alpha").withRequired(false) .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create()) .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create(); Option etaOpt = obuilder.withLongName("eta").withRequired(false) .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create()) .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create(); Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false) .withArgument(/*from w ww . ja va 2 s. com*/ abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault(10).create()) .withDescription("Maximum number of training passes").withShortName("m").create(); Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false) .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1) .withDefault(0.0).create()) .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|") .create(); Option burnInOpt = obuilder.withLongName("burnInIterations").withRequired(false) .withArgument( abuilder.withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault(5).create()) .withDescription("Minimum number of iterations").withShortName("b").create(); Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false) .withArgument( abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create()) .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c") .create(); Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false) .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no") .create()) .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt") .create(); Option numTrainThreadsOpt = obuilder .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create()) .withDescription("number of threads to train with").withShortName("ntt").create(); Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false) .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1") .create()) .withDescription("number of threads to update the model with").withShortName("nut").create(); Option verboseOpt = obuilder.withLongName("verbose").withRequired(false) .withArgument( abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create()) .withDescription("print verbose information, like top-terms in each topic, during iteration") .withShortName("v").create(); Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt) .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt) .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt) .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt) .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt) .withOption(modelCorpusFractionOption).withOption(verboseOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return -1; } String inputDirString = (String) cmdLine.getValue(inputDirOpt); String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null; int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt)); double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt)); double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt)); int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt)); int burnInIterations = (Integer) cmdLine.getValue(burnInOpt); double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt)); int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt)); int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt)); String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt); String docOutFile = (String) cmdLine.getValue(outputDocFileOpt); String reInferDocTopics = (String) cmdLine.getValue(reInferDocTopicsOpt); boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt)); double modelCorpusFraction = (Double) cmdLine.getValue(modelCorpusFractionOption); long start = System.nanoTime(); if (conf.get("fs.default.name") == null) { String dfsNameNode = (String) cmdLine.getValue(dfsOpt); conf.set("fs.default.name", dfsNameNode); } String[] terms = loadDictionary(dictDirString, conf); logTime("dictionary loading", System.nanoTime() - start); start = System.nanoTime(); Matrix corpus = loadVectors(inputDirString, conf); logTime("vector seqfile corpus loading", System.nanoTime() - start); start = System.nanoTime(); InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms, numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction, 1234); logTime("cvb0 init", System.nanoTime() - start); start = System.nanoTime(); cvb0.setVerbose(verbose); cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations); logTime("total training time", System.nanoTime() - start); if ("randstart".equalsIgnoreCase(reInferDocTopics)) { cvb0.inferDocuments(0.0, 100, true); } else if ("continue".equalsIgnoreCase(reInferDocTopics)) { cvb0.inferDocuments(0.0, 100, false); } start = System.nanoTime(); cvb0.writeModel(new Path(topicOutFile)); DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts); logTime("printTopics", System.nanoTime() - start); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); } return 0; }
From source file:it.jnrpe.plugins.TestCommandLineParsing.java
@Test public void testNoArgumentsOption() throws Exception { ClassLoader cl = TestCommandLineParsing.class.getClassLoader(); PluginDefinition pluginDef = PluginRepositoryUtil.parseXmlPluginDefinition(cl, cl.getResourceAsStream("check_mysql_plugin.xml")); GroupBuilder gBuilder = new GroupBuilder(); for (PluginOption po : pluginDef.getOptions()) { gBuilder = gBuilder.withOption(po.toOption()); }/*ww w. java 2 s .c o m*/ Group group = gBuilder.create(); Parser p = new Parser(); p.setGroup(group); CommandLine cli = p.parse(new String[] { "--hostname", "$ARG1$", "--port", "$ARG2$", "--database", "$ARG3$", "--user", "$ARG4$", "--password", "$ARG5$", "--check-slave" }); Assert.assertTrue(cli.hasOption("--check-slave")); }
From source file:it.jnrpe.server.console.PluginCommand.java
public boolean execute(final String[] args) throws Exception { Parser p = new Parser(); p.setGroup(getCommandLineGroup());/* w ww .ja v a2s . c om*/ try { p.parse(args); } catch (Exception e) { getConsole().println(); // getConsole().println("\u001B[1mERROR:\u001B[0m " + // e.getMessage()); getConsole().println(highlight("ERROR: ") + e.getMessage()); getConsole().println(); printHelp(); return false; } PluginProxy plugin = (PluginProxy) pluginRepository.getPlugin(pluginName); InjectionUtils.inject(plugin, context); //plugin.setContext(context); ReturnValue retVal = plugin.execute(args); getConsole().println(retVal.getMessage()); return false; }
From source file:com.tamingtext.tagrecommender.CountStackOverflowTags.java
public boolean parseArgs(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputFileOpt = obuilder.withLongName("inputFile").withRequired(true) .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The input file").withShortName("i").create(); Option outputFileOpt = obuilder.withLongName("outputFile").withRequired(true) .withArgument(abuilder.withName("outputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The output file").withShortName("o").create(); Option limitOpt = obuilder.withLongName("limit").withRequired(false) .withArgument(abuilder.withName("limit").withMinimum(1).withMaximum(1).create()) .withDescription("Emit this many of the most frequent tags").withShortName("l").create(); Option cutoffOpt = obuilder.withLongName("cutoff").withRequired(false) .withArgument(abuilder.withName("cutoff").withMinimum(1).withMaximum(1).create()) .withDescription("Drop tags with a count less than this number").withShortName("c").create(); Group group = gbuilder.withName("Options").withOption(inputFileOpt).withOption(outputFileOpt) .withOption(limitOpt).withOption(cutoffOpt).create(); try {// w w w . ja va 2 s.c o m Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return false; } inputFile = new File((String) cmdLine.getValue(inputFileOpt)); countFile = new File((String) cmdLine.getValue(outputFileOpt)); if (cmdLine.hasOption(limitOpt)) { limit = Integer.parseInt((String) cmdLine.getValue(limitOpt)); } if (cmdLine.hasOption(cutoffOpt)) { cutoff = Integer.parseInt((String) cmdLine.getValue(cutoffOpt)); } } catch (OptionException e) { log.error("Command-line option Exception", e); CommandLineUtil.printHelp(group); return false; } validate(); return true; }
From source file:com.tamingtext.tagrecommender.TestStackOverflowTagger.java
public boolean parseArgs(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputFileOpt = obuilder.withLongName("inputFile").withRequired(true) .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The input file").withShortName("i").create(); Option countFileOpt = obuilder.withLongName("countFile").withRequired(true) .withArgument(abuilder.withName("countFile").withMinimum(1).withMaximum(1).create()) .withDescription("The tag count file").withShortName("c").create(); Option outputFileOpt = obuilder.withLongName("outputFile").withRequired(true) .withArgument(abuilder.withName("outputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The output file").withShortName("c").create(); Option solrUrlOpt = obuilder.withLongName("solrUrl").withRequired(true) .withArgument(abuilder.withName("solrUrl").withMinimum(1).withMaximum(1).create()) .withDescription("URL of the solr server").withShortName("s").create(); Group group = gbuilder.withName("Options").withOption(inputFileOpt).withOption(countFileOpt) .withOption(outputFileOpt).withOption(solrUrlOpt).create(); try {//from ww w .j av a2 s . c o m Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return false; } inputFile = new File((String) cmdLine.getValue(inputFileOpt)); countFile = new File((String) cmdLine.getValue(countFileOpt)); outputFile = new File((String) cmdLine.getValue(outputFileOpt)); solrUrl = (String) cmdLine.getValue(solrUrlOpt); client = new TagRecommenderClient(solrUrl); } catch (OptionException e) { log.error("Command-line option Exception", e); CommandLineUtil.printHelp(group); return false; } catch (MalformedURLException e) { log.error("MalformedURLException", e); return false; } validate(); return true; }
From source file:com.tamingtext.tagrecommender.ExtractStackOverflowData.java
public boolean parseArgs(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputFileOpt = obuilder.withLongName("inputFile").withRequired(true) .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The input file").withShortName("i").create(); Option trainingOutputOpt = obuilder.withLongName("trainingOutputFile").withRequired(true) .withArgument(abuilder.withName("trainingOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The training data output file").withShortName("tr").create(); Option testOutputOpt = obuilder.withLongName("testOutputFile").withRequired(true) .withArgument(abuilder.withName("testOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The test data output file").withShortName("te").create(); Option trainingDataSizeOpt = obuilder.withLongName("trainingDataSize").withRequired(false) .withArgument(abuilder.withName("trainingDataSize").withMinimum(1).withMaximum(1).create()) .withDescription("The number of questions to extract for training data").withShortName("trs") .create();//from w w w . ja va2s. com Option testDataSizeOpt = obuilder.withLongName("testDataSize").withRequired(false) .withArgument(abuilder.withName("testDataSize").withMinimum(1).withMaximum(1).create()) .withDescription("The number of questions to extract for training data").withShortName("tes") .create(); Group group = gbuilder.withName("Options").withOption(inputFileOpt).withOption(trainingOutputOpt) .withOption(testOutputOpt).withOption(trainingDataSizeOpt).withOption(testDataSizeOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return false; } inputFile = new File((String) cmdLine.getValue(inputFileOpt)); trainingOutputFile = new File((String) cmdLine.getValue(trainingOutputOpt)); testOutputFile = new File((String) cmdLine.getValue(testOutputOpt)); if (cmdLine.hasOption(trainingDataSizeOpt)) { trainingDataSize = Integer.parseInt((String) cmdLine.getValue(trainingDataSizeOpt)); } if (cmdLine.hasOption(testDataSizeOpt)) { testDataSize = Integer.parseInt((String) cmdLine.getValue(testDataSizeOpt)); } } catch (OptionException e) { log.error("Command-line option Exception", e); CommandLineUtil.printHelp(group); return false; } validate(); return true; }
From source file:com.digitalpebble.behemoth.tika.TikaDriver.java
public int run(String[] args) throws Exception { final FileSystem fs = FileSystem.get(getConf()); GroupBuilder gBuilder = new GroupBuilder().withName("Options:"); List<Option> options = new ArrayList<Option>(); Option inputOpt = buildOption("input", "i", "The input path", true, true, null); options.add(inputOpt);/*from www . ja va 2 s. c o m*/ Option outOpt = buildOption("output", "o", "The output path", true, true, null); options.add(outOpt); Option tikaOpt = buildOption("tikaProcessor", "t", "The fully qualified name of a TikaProcessor class that handles the extraction (optional)", true, false, null); options.add(tikaOpt); Option mimeTypeOpt = buildOption("mimeType", "m", "The mime type to use (optional)", true, false, ""); options.add(mimeTypeOpt); for (Option opt : options) { gBuilder = gBuilder.withOption(opt); } Group group = gBuilder.create(); try { Parser parser = new Parser(); parser.setGroup(group); // TODO catch exceptions with parsing of opts CommandLine cmdLine = parser.parse(args); Path inputPath = new Path(cmdLine.getValue(inputOpt).toString()); Path outputPath = new Path(cmdLine.getValue(outOpt).toString()); String handlerName = null; if (cmdLine.hasOption(tikaOpt)) { handlerName = cmdLine.getValue(tikaOpt).toString(); } JobConf job = new JobConf(getConf()); job.setJarByClass(this.getClass()); if (cmdLine.hasOption(mimeTypeOpt)) { String mimeType = cmdLine.getValue(mimeTypeOpt).toString(); job.set(TikaConstants.TIKA_MIME_TYPE_KEY, mimeType); } if (handlerName != null && handlerName.equals("") == false) { job.set(TIKA_PROCESSOR_KEY, handlerName); } job.setJobName("Tika : " + inputPath.toString()); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BehemothDocument.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BehemothDocument.class); job.setMapperClass(TikaMapper.class); boolean isFilterRequired = BehemothReducer.isRequired(job); if (isFilterRequired) job.setReducerClass(BehemothReducer.class); else { job.setNumReduceTasks(0); } FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); try { long start = System.currentTimeMillis(); JobClient.runJob(job); long finish = System.currentTimeMillis(); if (log.isInfoEnabled()) { log.info("TikaDriver completed. Timing: " + (finish - start) + " ms"); } } catch (Exception e) { log.error("Exception", e); return -1; // don't delete the output as some of it could be used // fs.delete(outputPath, true); } finally { } } catch (OptionException e) { log.error("OptionException", e.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.setGroup(group); formatter.print(); return -1; } return 0; }