Example usage for org.apache.hadoop.mapreduce Job setJobName

List of usage examples for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException 

Source Link

Document

Set the user-specified job name.

Usage

From source file:io.bfscan.clueweb12.BuildVByteDocVectors.java

License:Apache License

/**
 * Runs this tool./*from w w  w . j a va 2s  .c  om*/
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("dictionary").create(DICTIONARY_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(REDUCERS_OPTION));
    options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg()
            .withDescription("preprocessing").create(PREPROCESSING));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(DICTIONARY_OPTION) || !cmdline.hasOption(PREPROCESSING)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);
    String dictionary = cmdline.getOptionValue(DICTIONARY_OPTION);
    String preprocessing = cmdline.getOptionValue(PREPROCESSING);

    Job job = Job.getInstance(getConf());
    job.setJobName(BuildVByteDocVectors.class.getSimpleName() + ":" + input);
    job.setJarByClass(BuildVByteDocVectors.class);

    LOG.info("Tool name: " + BuildVByteDocVectors.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);
    LOG.info(" - dictionary: " + dictionary);
    LOG.info(" - preprocessing: " + preprocessing);

    if (cmdline.hasOption(REDUCERS_OPTION)) {
        int numReducers = Integer.parseInt(cmdline.getOptionValue(REDUCERS_OPTION));
        LOG.info(" - reducers: " + numReducers);
        job.setNumReduceTasks(numReducers);
    } else {
        job.setNumReduceTasks(0);
    }

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.getConfiguration().set(DICTIONARY_OPTION, dictionary);
    job.getConfiguration().set(PREPROCESSING, preprocessing);

    job.setInputFormatClass(ClueWeb12InputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(BytesWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setMapperClass(MyMapper.class);

    FileSystem.get(getConf()).delete(new Path(output), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:io.covert.binary.analysis.BinaryAnalysisJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 2) {
        usage("");
    }/*from   w  ww .  ja  v a2  s .c om*/

    String inDir = args[0];
    String outDir = args[1];

    Configuration conf = getConf();
    for (String name : requiredSettings) {
        if (conf.get(name) == null)
            usage("Missing required setting: " + name);
    }

    Job job = new Job(conf);
    job.setJobName(BinaryAnalysisJob.class.getName() + " inDir=" + inDir + ", outDir=" + outDir);
    job.setJarByClass(getClass());

    job.setMapperClass(BinaryAnalysisMapper.class);
    job.setNumReduceTasks(0);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(BytesWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, new Path(inDir));

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outDir));
    job.submit();

    int retVal = job.waitForCompletion(true) ? 0 : 1;
    return retVal;
}

From source file:io.covert.dns.collection.CollectionJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 4) {
        usage("");
    }//from   w w w.  j a v a  2 s.c  om

    String dclass = args[0];
    String types = args[1];
    String inDir = args[2];
    String outDir = args[3];

    Configuration conf = getConf();

    if (conf.get("dns.collection.num.resolvers") == null)
        conf.setInt("dns.collection.num.resolvers", 50);
    if (conf.get("dns.collection.nameservers") == null)
        conf.set("dns.collection.nameservers", "127.0.0.1");

    Job job = new Job(conf);
    job.setJobName(CollectionJob.class.getSimpleName() + ": types=" + types + ", dclass=" + dclass + " inDir="
            + inDir + ", outDir=" + outDir + ", resolvers=" + conf.get("dns.collection.nameservers"));
    job.setJarByClass(getClass());

    job.setMapperClass(CollectionMapper.class);
    job.setNumReduceTasks(0);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setInputFormatClass(DnsRequestInputFormat.class);
    DnsRequestInputFormat.setInputPaths(job, new Path(inDir));
    DnsRequestInputFormat.configure(job, dclass.toUpperCase(), Arrays.asList(types.split(",")),
            Arrays.asList(""));

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outDir));
    SequenceFileOutputFormat.setCompressOutput(job, true);
    job.submit();

    int retVal = job.waitForCompletion(true) ? 0 : 1;

    CounterGroup counters = job.getCounters().getGroup(CollectionMapper.RESOLVER_GROUP);
    Counter constructMessageMS = counters.findCounter(CollectionMapper.CONSTRUCT_MESSAGE_MS);
    Counter parseResponseMS = counters.findCounter(CollectionMapper.PARSE_RESPONSE_MS);
    Counter performRequestMS = counters.findCounter(CollectionMapper.PERFORM_REQUEST_MS);
    Counter totalRequestHandlingMS = counters.findCounter(CollectionMapper.TOTAL_REQUEST_HANDLING_MS);

    Log.info("Total ConstructMessage percent: "
            + (double) (constructMessageMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue()));
    Log.info("Total ParseResponse percent:    "
            + (double) (parseResponseMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue()));
    Log.info("Total PerformRequest percent:   "
            + (double) (performRequestMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue()));

    return retVal;
}

From source file:io.covert.dns.extract.ExtractorJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 3) {
        usage("");
    }/*www.  ja  v a  2  s. c o  m*/

    String expression = args[0];
    String inDir = args[1];
    String outDir = args[2];

    Configuration conf = getConf();
    conf.set(ExtractorMapper.EXTRACTOR_JEXL_EXPRESSION, expression);

    Job job = new Job(conf);
    job.setJobName(ExtractorJob.class.getSimpleName() + ": inDir=" + inDir + ", outDir=" + outDir
            + ", expression=[" + expression + "]");
    job.setJarByClass(getClass());

    job.setMapperClass(ExtractorMapper.class);
    job.setReducerClass(UniqueKeyOnlyReducer.class);
    job.setNumReduceTasks(new JobClient(new JobConf(conf)).getClusterStatus().getTaskTrackers());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(inDir));

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outDir));
    SequenceFileOutputFormat.setCompressOutput(job, true);
    job.submit();

    int retVal = job.waitForCompletion(true) ? 0 : 1;
    return retVal;
}

From source file:io.covert.dns.filtering.FilterJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 3) {
        usage("");
    }//  www  .j a v  a 2  s.  com

    String filter = args[0];
    String inDir = args[1];
    String outDir = args[2];

    Configuration conf = getConf();
    conf.set(FilterMapper.FILTER_JEXL_EXPRESSION, filter);

    Job job = new Job(conf);
    job.setJobName(FilterJob.class.getSimpleName() + ": inDir=" + inDir + ", outDir=" + outDir + ", filter=["
            + filter + "]");
    job.setJarByClass(getClass());

    job.setMapperClass(FilterMapper.class);
    job.setReducerClass(Reducer.class); // Identity Reduce...
    job.setNumReduceTasks(new JobClient(new JobConf(conf)).getClusterStatus().getTaskTrackers());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(inDir));

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outDir));
    SequenceFileOutputFormat.setCompressOutput(job, true);
    job.submit();

    int retVal = job.waitForCompletion(true) ? 0 : 1;
    return retVal;
}

From source file:io.covert.dns.geo.GeoJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 4) {
        usage("");
    }/*ww  w  .j  av a  2  s.c o  m*/

    String dbfile = args[0];
    String asnDbfile = args[1];
    String inDir = args[2];
    String outDir = args[3];

    Configuration conf = getConf();
    conf.set("maxmind.geo.database.file", dbfile);
    conf.set("maxmind.asn.database.file", asnDbfile);

    Job job = new Job(conf);
    job.setJobName(GeoJob.class.getSimpleName() + ": dbfile=" + dbfile + ", asnDB=" + asnDbfile + " inDir="
            + inDir + ", outDir=" + outDir);
    job.setJarByClass(getClass());

    job.setMapperClass(GeoMapper.class);
    job.setNumReduceTasks(0);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(inDir));

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outDir));
    SequenceFileOutputFormat.setCompressOutput(job, true);
    job.submit();

    int retVal = job.waitForCompletion(true) ? 0 : 1;
    return retVal;
}

From source file:io.covert.dns.parse.ParseJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    String inDir = args[0];/*from  ww w .j a va2  s  . com*/
    String outDir = args[1];

    Configuration conf = getConf();

    Job job = new Job(conf);
    job.setJobName(ParseJob.class.getSimpleName() + ": inDir=" + inDir + ", outDir=" + outDir);
    job.setJarByClass(getClass());

    job.setMapperClass(ParseMapper.class);
    job.setReducerClass(UniqueKeyOnlyReducer.class);
    job.setNumReduceTasks(new JobClient(new JobConf(conf)).getClusterStatus().getTaskTrackers());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(inDir));

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outDir));
    SequenceFileOutputFormat.setCompressOutput(job, true);
    job.submit();

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:io.covert.dns.storage.StorageJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    String inDir = args[0];/*from w ww  .j  a va2 s.co  m*/
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setJobName(StorageJob.class.getSimpleName() + ": inDir=" + inDir);

    job.setMapperClass(StorageMapper.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(inDir));

    // This job doesn't write output via Hadoop, it uses the configured storage modules
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.submit();

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:io.covert.util.FileFormatToConverterJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 1) {
        usage("");
    }/*  www. j ava  2  s. c o m*/

    String inDir = args[0];

    Configuration conf = getConf();

    if (conf.get("stream.process.command") == null) {
        conf.set("stream.process.command", "/opt/decompress.sh");
    }

    Job job = new Job(conf);
    job.setJobName(FileFormatToConverterJob.class.getName() + " inDir=" + inDir);
    job.setJarByClass(getClass());

    job.setMapperClass(ConvertMapper.class);
    job.setNumReduceTasks(0);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, new Path(inDir));

    job.setOutputFormatClass(NullOutputFormat.class);
    job.submit();

    int retVal = job.waitForCompletion(true) ? 0 : 1;
    return retVal;
}

From source file:io.fluo.stress.trie.Init.java

License:Apache License

private int unique(Path input, Path tmp) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJarByClass(Init.class);

    job.setJobName(Init.class.getName() + "_unique");

    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, input);

    job.setReducerClass(UniqueReducer.class);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, tmp);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;

}