Example usage for org.apache.commons.cli OptionBuilder withArgName

List of usage examples for org.apache.commons.cli OptionBuilder withArgName

Introduction

In this page you can find the example usage for org.apache.commons.cli OptionBuilder withArgName.

Prototype

public static OptionBuilder withArgName(String name) 

Source Link

Document

The next Option created will have the specified argument value name.

Usage

From source file:backtype.storm.GenericOptionsParser.java

static Options buildGeneralOptions(Options opts) {
    Options r = new Options();

    for (Object o : opts.getOptions())
        r.addOption((Option) o);/*from ww  w. ja v  a  2 s. c  om*/

    Option libjars = OptionBuilder.withArgName("paths").hasArg()
            .withDescription("comma separated jars to be used by the submitted topology").create("libjars");
    r.addOption(libjars);
    optionProcessors.put("libjars", new LibjarsProcessor());

    Option conf = OptionBuilder.withArgName("configuration file").hasArg()
            .withDescription("an application configuration file").create("conf");
    r.addOption(conf);
    optionProcessors.put("conf", new ConfFileProcessor());

    // Must come after `conf': this option is of higher priority
    Option extraConfig = OptionBuilder.withArgName("D").hasArg()
            .withDescription("extra configurations (preserving types)").create("D");
    r.addOption(extraConfig);
    optionProcessors.put("D", new ExtraConfigProcessor());

    return r;
}

From source file:com.boundary.plugin.sdk.jmx.ExportMBeans.java

@SuppressWarnings("static-access")
private void buildOptions() {
    helpOption = OptionBuilder.withDescription("Display help information").withLongOpt("help").create("?");
    hostOption = OptionBuilder.withArgName("host").hasArgs(1).isRequired().withDescription("JMX host")
            .withLongOpt("host").create("h");
    portOption = OptionBuilder.withArgName("port").hasArgs(1).isRequired().withDescription("JMX port")
            .withLongOpt("port").create("p");
    exportOption = OptionBuilder.withArgName("type").hasArgs(1).withDescription(
            "Selects what to export which is either: mbeans, metrics, or plugins." + " defaults to mbeans")
            .withLongOpt("export").create("e");
    mergeFileOption = OptionBuilder.withArgName("metric-definitions.json").hasArgs(1)
            .withDescription("Metrics definition file to merge into exported metric definitions."
                    + "Used to override previously specified values. Uses the metric identifer"
                    + " as the key.")
            .withLongOpt("merge-file").create("m");
    outputFileOption = OptionBuilder.withArgName("path").hasArgs(1)
            .withDescription("Path to output the exported json file").create("f");
    prefixOption = OptionBuilder.withArgName("prefix").hasArgs(1)
            .withDescription("Prefix to add to metric display names. Defaults to empty").create("x");

    options.addOption(helpOption);/*from   ww  w  . ja v a 2 s.  c  om*/
    options.addOption(hostOption);
    options.addOption(portOption);
    options.addOption(exportOption);
    options.addOption(mergeFileOption);
    options.addOption(outputFileOption);
    options.addOption(prefixOption);

}

From source file:BuildPageRankRecords.java

/**
 * Runs this tool./*w w w. java  2  s .  c  o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));

    LOG.info("Tool name: " + BuildPageRankRecords.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numNodes: " + n);

    Configuration conf = getConf();
    conf.setInt(NODE_CNT_FIELD, n);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildPageRankRecords.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(BuildPageRankRecords.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:com.archivas.clienttools.arcmover.cli.ArcDelete.java

@SuppressWarnings({ "static-access", "AccessStaticViaInstance" })
public Options getOptions() {
    if (cliOptions == null) {
        Options options = new Options();

        // *** Adding a new option needs to be added to the cliOrder list
        // Note, you cannot do required options with this library and help so we have to add all
        // as non-required
        // and deal with it when parsing
        options.addOption(OptionBuilder.withDescription("Displays this help text (the default behavior).")
                .withLongOpt(HELP_OPTION).create("h"));

        // Required
        options.addOption(OptionBuilder.withArgName("profile_name").hasArg().withDescription(
                "Target location for the delete operation: either a namespace profile name or LFS for the local file system.")
                .withLongOpt(PROFILE_OPTION).create("p"));
        options.addOption(OptionBuilder.withArgName("path").hasArg()
                .withDescription("Directory in which to perform the delete operation.").withLongOpt(PATH_OPTION)
                .create());//from ww  w .  j a v a2  s  . co  m
        options.addOption(OptionBuilder.withArgName("operation_type").hasArg().withDescription(
                "Type of operation to perform: delete, purge, privileged-delete, or privileged-purge.  If omitted, defaults to delete.  Only supported for HCP namespaces.")
                .withLongOpt(OPERATION_OPTION).create());
        options.addOption(OptionBuilder.withArgName("string").hasArg().withDescription(
                "Specifies the reason for a privileged operation.  The string must be from one through 1024 characters long.  Required and only supported for a privileged operation.")
                .withLongOpt(REASON_OPTION).create()); // Required

        options.addOption(OptionBuilder.withArgName("job_name").hasOptionalArg().withDescription(
                "Reruns the delete job with the given job name if provide, if no name is provided it reruns the last delete job run.  When rerunning you can change the load and export settings.  Any changes to the profile, path, or operation-type will not change what is set in the job.")
                .withLongOpt(RERUN).create());
        options.addOption(OptionBuilder.withArgName("job_name").hasOptionalArg().withDescription(
                "Resumes the delete job from where it left off, if no name is provided it resumes the last delete job run.  When rerunning you can change the load and export settings.  Any changes to the profile, path, or operation-type will not change what is set in the job.")
                .withLongOpt(RESUME).create());

        // Optional
        options.addOption(getInsecureSSLOption());

        // results_types is only not shared because CONFLICT is not a valid option for delete
        // jobs
        options.addOption(OptionBuilder.withArgName("results_types").hasArg().withDescription(
                "Types of results lists to export: either ALL or a comma-separated list that includes one or more of SUCCESS, FAILURE, and JOBLIST.  If omitted no results lists are exported.")
                .withLongOpt(EXPORT_RESULTS_TYPE).create());

        getSharedOptions(options);
        cliOptions = options;
    }
    return cliOptions;
}

From source file:edu.umd.cloud9.collection.wikipedia.RepackWikipedia.java

@SuppressWarnings("static-access")
@Override// w  w w . j a  v  a2s.  c  o m
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output location")
            .create(OUTPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("mapping file")
            .create(MAPPING_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("block|record|none").hasArg()
            .withDescription("compression type").create(COMPRESSION_TYPE_OPTION));
    options.addOption(OptionBuilder.withArgName("wiki_language").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(MAPPING_FILE_OPTION) || !cmdline.hasOption(COMPRESSION_TYPE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputPath = cmdline.getOptionValue(OUTPUT_OPTION);
    String mappingFile = cmdline.getOptionValue(MAPPING_FILE_OPTION);
    String compressionType = cmdline.getOptionValue(COMPRESSION_TYPE_OPTION);

    if (!"block".equals(compressionType) && !"record".equals(compressionType)
            && !"none".equals(compressionType)) {
        System.err.println("Error: \"" + compressionType + "\" unknown compression type!");
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    // this is the default block size
    int blocksize = 1000000;

    JobConf conf = new JobConf(getConf(), RepackWikipedia.class);
    conf.setJobName(String.format("RepackWikipedia[%s: %s, %s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            OUTPUT_OPTION, outputPath, COMPRESSION_TYPE_OPTION, compressionType, LANGUAGE_OPTION, language));

    conf.set(DOCNO_MAPPING_FIELD, mappingFile);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - XML dump file: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - docno mapping data file: " + mappingFile);
    LOG.info(" - compression type: " + compressionType);
    LOG.info(" - language: " + language);

    if ("block".equals(compressionType)) {
        LOG.info(" - block size: " + blocksize);
    }

    int mapTasks = 10;

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(0);

    SequenceFileInputFormat.addInputPath(conf, new Path(inputPath));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));

    if ("none".equals(compressionType)) {
        SequenceFileOutputFormat.setCompressOutput(conf, false);
    } else {
        SequenceFileOutputFormat.setCompressOutput(conf, true);

        if ("record".equals(compressionType)) {
            SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.RECORD);
        } else {
            SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);
            conf.setInt("io.seqfile.compress.blocksize", blocksize);
        }
    }

    if (language != null) {
        conf.set("wiki.language", language);
    }

    conf.setInputFormat(WikipediaPageInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(WikipediaPage.class);

    conf.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    return 0;
}

From source file:ca.uwaterloo.cs.bigdata2017w.assignment4.BuildPersonalizedPageRankRecords.java

/**
 * Runs this tool.//from   ww w . j a v  a2s  . c  om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(
            OptionBuilder.withArgName("sources").hasArg().withDescription("source nodes").create(SOURCES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    String sourcesString = cmdline.getOptionValue(SOURCES);
    String[] sources = sourcesString.split(",");
    for (int i = 0; i < sources.length; i++) {
        sources[i] = sources[i].trim();
    }

    LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numNodes: " + n);
    LOG.info(" - use sources: " + sourcesString);

    Configuration conf = getConf();
    conf.setInt(NODE_CNT_FIELD, n);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.setStrings(SOURCES, sources);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(BuildPersonalizedPageRankRecords.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:com.yahoo.flowetl.commons.runner.BaseRunner.java

/**
 * Gets the command line options that this class will be using.
 * //from w w  w . j a va  2s.c  om
 * @return the options
 */
@SuppressWarnings("static-access")
protected Options getOptions() {
    Options cliOpts = new Options();
    Option cfgFile = OptionBuilder.withArgName("file").hasArg(true).withDescription("the configuration file")
            .create(CONFIG_OPT);
    cliOpts.addOption(cfgFile);
    Option helpOpt = OptionBuilder.hasArg(false).withDescription("help message").create(HELP_OPT);
    cliOpts.addOption(helpOpt);
    return cliOpts;
}

From source file:WikipediaForwardIndexBuilder.java

@SuppressWarnings("static-access")
@Override//  w w w .  j a  v  a 2s  .c o  m
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION));
    String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION);

    String tmpPath = "tmp-" + WikipediaForwardIndexBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000);

    if (!inputPath.isAbsolute()) {
        System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!");
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    JobConf conf = new JobConf(getConf(), WikipediaForwardIndexBuilder.class);
    FileSystem fs = FileSystem.get(conf);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - language: " + language);
    LOG.info("Note: This tool only works on block-compressed SequenceFiles!");

    conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language));

    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(tmpPath));
    FileOutputFormat.setCompressOutput(conf, false);

    if (language != null) {
        conf.set("wiki.language", language);
    }

    conf.setInputFormat(NoSplitSequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(IdentityReducer.class);

    // Delete the output directory if it exists already.
    fs.delete(new Path(tmpPath), true);

    RunningJob job = JobClient.runJob(conf);

    Counters counters = job.getCounters();
    int blocks = (int) counters.getCounter(Blocks.Total);

    LOG.info("number of blocks: " + blocks);

    LOG.info("Writing index file...");
    LineReader reader = new LineReader(fs.open(new Path(tmpPath + "/part-00000")));
    FSDataOutputStream out = fs.create(new Path(indexFile), true);

    out.writeUTF(edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex.class.getCanonicalName());
    out.writeUTF(inputPath.toString());
    out.writeInt(blocks);

    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\s+");

        int docno = Integer.parseInt(arr[0]);
        int offset = Integer.parseInt(arr[1]);
        short fileno = Short.parseShort(arr[2]);

        out.writeInt(docno);
        out.writeInt(offset);
        out.writeShort(fileno);

        cnt++;

        if (cnt % 100000 == 0) {
            LOG.info(cnt + " blocks written");
        }
    }

    reader.close();
    out.close();

    if (cnt != blocks) {
        throw new RuntimeException("Error: mismatch in block count!");
    }

    // Clean up.
    fs.delete(new Path(tmpPath), true);

    return 0;
}

From source file:RepackWikipedia.java

@SuppressWarnings("static-access")
@Override//from   www .j  a  va  2s.com
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output location")
            .create(OUTPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("mapping file")
            .create(MAPPING_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("block|record|none").hasArg()
            .withDescription("compression type").create(COMPRESSION_TYPE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de").hasArg().withDescription("two-letter language code")
            .create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(MAPPING_FILE_OPTION) || !cmdline.hasOption(COMPRESSION_TYPE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputPath = cmdline.getOptionValue(OUTPUT_OPTION);
    String mappingFile = cmdline.getOptionValue(MAPPING_FILE_OPTION);
    String compressionType = cmdline.getOptionValue(COMPRESSION_TYPE_OPTION);

    if (!"block".equals(compressionType) && !"record".equals(compressionType)
            && !"none".equals(compressionType)) {
        System.err.println("Error: \"" + compressionType + "\" unknown compression type!");
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    // this is the default block size
    int blocksize = 1000000;

    //Job job = Job.getInstance(getConf());
    JobConf conf = new JobConf(RepackWikipedia.class);
    conf.setJarByClass(RepackWikipedia.class);
    conf.setJobName(String.format("RepackWikipedia[%s: %s, %s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            OUTPUT_OPTION, outputPath, COMPRESSION_TYPE_OPTION, compressionType, LANGUAGE_OPTION, language));

    conf.set(DOCNO_MAPPING_FIELD, mappingFile);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - XML dump file: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - docno mapping data file: " + mappingFile);
    LOG.info(" - compression type: " + compressionType);
    LOG.info(" - language: " + language);

    if ("block".equals(compressionType)) {
        LOG.info(" - block size: " + blocksize);
    }

    conf.setNumReduceTasks(0);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    if ("none".equals(compressionType)) {
        FileOutputFormat.setCompressOutput(conf, false);
    } else {
        FileOutputFormat.setCompressOutput(conf, true);

        if ("record".equals(compressionType)) {
            SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.RECORD);
        } else {
            SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);
            conf.setInt("io.seqfile.compress.blocksize", blocksize);
        }
    }

    if (language != null) {
        conf.set("wiki.language", language);
    }

    conf.setInputFormat(WikipediaPageInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(WikipediaPage.class);

    conf.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(outputPath), true);

    //job.waitForCompletion(true);
    JobClient.runJob(conf);

    return 0;
}

From source file:edu.umd.gorden2.BuildInvertedIndex2.java

/**
 * Runs this tool.//  w ww .  j  a  v  a2  s  .  c o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + BuildInvertedIndex2.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(BuildInvertedIndex2.class.getSimpleName());
    job.setJarByClass(BuildInvertedIndex2.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfInts.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(PairOfWritables.class);
    //job.setOutputFormatClass(MapFileOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}