Example usage for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException

Source Link

Document

Set the user-specified job name.

Usage

From source file:hu.sztaki.ilab.bigdata.common.tools.hbase.PerformanceEvaluation.java

License:Apache License

private void doMapReduce(final Class<? extends Test> cmd)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path inputDir = writeInputFile(this.conf);
    this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
    this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
    Job job = new Job(this.conf);
    job.setJarByClass(PerformanceEvaluation.class);
    job.setJobName("HBase Performance Evaluation");

    job.setInputFormatClass(PeInputFormat.class);
    PeInputFormat.setInputPaths(job, inputDir);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(EvaluationMapTask.class);
    job.setReducerClass(LongSumReducer.class);

    job.setNumReduceTasks(1);//from  www  .  j a v  a 2s . c  om

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(inputDir, "outputs"));

    job.waitForCompletion(true);
}

From source file:info.halo9pan.word2vec.hadoop.mr.WordSort.java

License:Apache License

public int run(String[] args) throws Exception {
    logger.info("starting");
    Job job = Job.getInstance(getConf());
    Path inputDir = new Path(args[0]);
    Path outputDir = new Path(args[1]);
    boolean useSimplePartitioner = getUseSimplePartitioner(job);
    SortInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("WordSort");
    job.setJarByClass(WordSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(SortInputFormat.class);
    job.setOutputFormatClass(SortOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {/*www .  j  a  va 2  s.c  o  m*/
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, SortInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + SortInputFormat.PARTITION_FILENAME);
        try {
            SortInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            logger.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    SortOutputFormat.setFinalSync(job, true);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    logger.info("done");
    return ret;
}

From source file:info.halo9pan.word2vec.hadoop.terasort.TeraGen.java

License:Apache License

/**
 * @param args the cli arguments//from w  w w  .  j  a v  a 2s .  co m
 */
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    setNumberOfRows(job, parseHumanLong(args[0]));
    Path outputDir = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClass(TeraGen.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(RangeInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:info.halo9pan.word2vec.hadoop.terasort.TeraValidate.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();/*from w  ww . j a v  a  2 s . c  o m*/
        return 1;
    }
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraValidate");
    job.setJarByClass(TeraValidate.class);
    job.setMapperClass(ValidateMapper.class);
    job.setReducerClass(ValidateReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    // force a single split 
    FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
    job.setInputFormatClass(TeraInputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:io.aos.mapreduce.count.WordCountTool.java

License:Apache License

public int run(String[] args) throws Exception {

    if (!((args.length > 0) && (args.length < 3))) {
        System.out.println("WordCount <inDir> <outDir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//from w ww .  j  a  v a 2  s.  co  m

    Path inPath = new Path(args[0]);
    Path outPath = new Path(args[1]);

    Configuration conf = getConf();

    Job job = Job.getInstance(conf);

    job.setJobName("WordCount_" + inPath.getName());
    job.setJar("./target/datalayer-hadoop-mapreduce-1.0.0-SNAPSHOT.jar");
    //        job.setJarByClass(WordCountTool.class);

    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.setInputPaths(job, inPath);

    FileOutputFormat.setOutputPath(job, outPath);
    job.setOutputFormatClass(TextOutputFormat.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;

}

From source file:io.aos.mapreduce.grep.GrepTool.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        org.apache.hadoop.util.Tool t;
        return 2;
    }/*from  www.j a v  a 2 s  .co m*/

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);

    if (args.length == 4) {
        conf.set(RegexMapper.GROUP, args[3]);
    }

    try {

        Job greJob = Job.getInstance(conf);
        greJob.setJobName("GrepSearch");

        FileInputFormat.setInputPaths(greJob, args[0]);

        greJob.setMapperClass(RegexMapper.class);
        greJob.setCombinerClass(LongSumReducer.class);
        greJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(greJob, tempDir);
        greJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        greJob.setOutputKeyClass(Text.class);
        greJob.setOutputValueClass(LongWritable.class);

        greJob.waitForCompletion(true);

        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("GrepSort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        // Write a single file
        sortJob.setNumReduceTasks(1);

        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq

                LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);

    }

    catch (Exception e) {
        return 2;
    }

    finally {
        FileSystem.get(conf).delete(tempDir, true);
    }

    return 0;

}

From source file:io.aos.t4f.hadoop.mapreduce.WordCountMapReduceTest2.java

License:Apache License

public static int main(String... args) throws Exception {

    // Get the default configuration object
    Configuration conf = new Configuration();

    // Add resources
    conf.addResource("hdfs-default.xml");
    conf.addResource("hdfs-site.xml");
    conf.addResource("mapred-default.xml");
    conf.addResource("mapred-site.xml");

    Job job = new Job(conf);
    job.setJobName("WordCount");

    List<String> other_args = parseArguments(args, job);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MapClass.class);
    job.setCombinerClass(ReduceClass.class);
    job.setReducerClass(ReduceClass.class);

    // Set the input format class
    job.setInputFormatClass(TextInputFormat.class);
    // Set the output format class
    job.setOutputFormatClass(TextOutputFormat.class);
    // Set the input path
    TextInputFormat.setInputPaths(job, other_args.get(0));
    // Set the output path
    TextOutputFormat.setOutputPath(job, new Path(other_args.get(1)));

    /*/*  w  ww  .j a  v  a 2 s  .c  o m*/
     * Set the minimum and maximum split sizes This parameter helps to
     * specify the number of map tasks. For each input split, there will be
     * a separate map task. In this example each split is of size 32 MB
     */
    TextInputFormat.setMinInputSplitSize(job, 32 * MEGABYTES);
    TextInputFormat.setMaxInputSplitSize(job, 32 * MEGABYTES);

    // Set the jar file to run
    job.setJarByClass(WordCountMapReduceTest2.class);

    // Submit the job
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int exitCode = job.waitForCompletion(true) ? 0 : 1;

    if (exitCode == 0) {
        Date end_time = new Date();
        System.out.println("Job ended: " + end_time);
        System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    } else {
        System.out.println("Job Failed!!!");
    }

    return exitCode;

}

From source file:io.aos.t4f.hadoop.mapreduce.WordCountMapReduceTest3.java

License:Apache License

public void testMapReduce() throws Exception {

    String inputPath = "/docs/ChangesFancyStyle.css";
    String outputPath = "/out";

    Configuration configuration = new Configuration();
    configuration.set("fs.default.name", "hdfs://ppc006:54310");
    configuration.set("mapred.job.tracker", "ppc006:54311");
    configuration.set("mapred.job.tracker", "local");
    configuration.set("fs.default.name", "local");

    DistributedCache.addArchiveToClassPath(new Path("/jar/t4f-nosql-hadoop-1.0-SNAPSHOT.jar"), configuration);

    Job job = new Job(configuration);
    //        job.setJarByClass(Driver.class);
    job.setJobName("TestJob");
    job.setMapperClass(MapClass.class);
    job.setReducerClass(ReduceClass.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.waitForCompletion(true);//from   w  w  w.  j a v a2  s .  c o m

}

From source file:io.bfscan.clueweb12.BuildDictionary.java

License:Apache License

/**
 * Runs this tool.//from  w  ww  .  ja v a 2 s  .c  o  m
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of terms").create(COUNT_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(COUNT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);

    LOG.info("Tool name: " + ComputeTermStatistics.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);

    Configuration conf = getConf();

    conf.set(HADOOP_OUTPUT_OPTION, output);
    conf.setInt(HADOOP_TERMS_COUNT_OPTION, Integer.parseInt(cmdline.getOptionValue(COUNT_OPTION)));
    conf.set("mapreduce.map.memory.mb", "4096");
    conf.set("mapreduce.map.java.opts", "-Xmx4096m");
    conf.set("mapreduce.reduce.memory.mb", "4096");
    conf.set("mapreduce.reduce.java.opts", "-Xmx4096m");

    Job job = Job.getInstance(conf);
    job.setJobName(BuildDictionary.class.getSimpleName() + ":" + input);
    job.setJarByClass(BuildDictionary.class);
    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfIntLong.class);
    job.setOutputKeyClass(Text.class);
    job.setSortComparatorClass(DictionaryTransformationStrategy.WritableComparator.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(MyReducer.class);

    FileSystem.get(getConf()).delete(new Path(output), true);
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:io.bfscan.clueweb12.BuildPForDocVectors.java

License:Apache License

/**
 * Runs this tool.//  ww w  .j a v  a  2s.  c om
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("dictionary").create(DICTIONARY_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(REDUCERS_OPTION));
    options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg()
            .withDescription("preprocessing").create(PREPROCESSING));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(DICTIONARY_OPTION) || !cmdline.hasOption(PREPROCESSING)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);
    String dictionary = cmdline.getOptionValue(DICTIONARY_OPTION);
    String preprocessing = cmdline.getOptionValue(PREPROCESSING);

    Job job = Job.getInstance(getConf());
    job.setJobName(BuildPForDocVectors.class.getSimpleName() + ":" + input);
    job.setJarByClass(BuildPForDocVectors.class);

    LOG.info("Tool name: " + BuildPForDocVectors.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);
    LOG.info(" - dictionary: " + dictionary);
    LOG.info(" - preprocessing: " + preprocessing);

    if (cmdline.hasOption(REDUCERS_OPTION)) {
        int numReducers = Integer.parseInt(cmdline.getOptionValue(REDUCERS_OPTION));
        LOG.info(" - reducers: " + numReducers);
        job.setNumReduceTasks(numReducers);
    } else {
        job.setNumReduceTasks(0);
    }

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.getConfiguration().set(DICTIONARY_OPTION, dictionary);
    job.getConfiguration().set(PREPROCESSING, preprocessing);

    job.setInputFormatClass(ClueWeb12InputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntArrayWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntArrayWritable.class);

    job.setMapperClass(MyMapper.class);

    FileSystem.get(getConf()).delete(new Path(output), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}