Example usage for org.apache.hadoop.mapreduce Job setMapperClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException 

Source Link

Document

Set the Mapper for the job.

Usage

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageMultipleOutputJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, AverageMultipleOutputJob.class.getSimpleName());
    job.setJarByClass(AverageMultipleOutputJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Multiple Output Job");
    job.setMapperClass(AverageMapper.class);
    job.setReducerClass(AverageMultipleOutputReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    MultipleOutputs.addNamedOutput(job, "greaterThan1000", TextOutputFormat.class, Text.class,
            DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "lessThan1000", TextOutputFormat.class, Text.class,
            DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;/*from   w  w w  . j  ava 2  s  .  c  om*/
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.BloomFilterJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, BloomFilterJob.class.getSimpleName());
    job.setJarByClass(BloomFilterJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample BloomFilter Job");
    job.setMapperClass(BloomFilterMapper.class);
    job.setReducerClass(BloomFilterReducer.class);
    job.setNumReduceTasks(1);/*from   w w w  .jav  a 2s.  com*/

    job.setInputFormatClass(TextInputFormat.class);

    /*
     * We want our reducer to output the final BloomFilter as a binary file. I think 
     * Hadoop doesn't have this format [check later], so using NullOutpuFormat.class.
     * 
     * In general life gets a little more dangerous when you deviate from MapReduce's input/output 
     * framework and start working with your own files. Your tasks are no longer guaranteed to be idempotent 
     * and you'll need to understand how various failure scenarios can affect your tasks. For example, your files 
     * may only be partially written when some tasks are restarted. Our example here is safe(r) because all the file 
     * operations take place together only once in the close() method and in only one reducer. A more 
     * careful/paranoid implementation would check each individual file operation more closely.
     */
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BloomFilter.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.CountJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, CountJob.class.getSimpleName());
    job.setJarByClass(CountJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Count Job");
    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;//from www  .  ja  v  a 2s  . com
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.DistributedCacheJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, DistributedCacheJob.class.getSimpleName());
    job.setJarByClass(DistributedCacheJob.class);

    /*/*from ww w .  j av a2  s. c  om*/
     * The following will disseminate the file to all the nodes and the file defaults to HDFS.
     * The second and third arguments denote the input and output paths of the standard Hadoop 
     * job. Note that we've limited the number of data sources to two. This is not an inherent 
     * limitation of the technique, but a simplification that makes our code easier to follow.
     */
    //job.addCacheFile(new Path(args[0]).toUri());

    Path in = new Path(args[1]);
    Path out = new Path(args[2]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample DistributedCache Job");
    job.setMapperClass(DistributedCacheMapper.class);

    /*
     * Took out the Reduce class as the plan is performing the joining in the map phase and will 
     * configure the job to have no reduce.
     */
    job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
}

From source file:com.jumptap.h2redis.RedisDriver.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 5) {
        usage();/*from w  ww  .j a v  a2  s  .c o  m*/
        return 1;
    }

    Map<String, String> argMap = new HashMap<String, String>();
    String[] kv;

    for (String arg : args) {
        kv = arg.split("=");
        if (kv.length != 2) {
            usage();
            return 1;
        }
        argMap.put(kv[0].trim(), kv[1]);
    }

    Configuration conf = getConf();
    String[] hostPort = argMap.get(REDIS_CMD).split(":");
    conf.set(REDIS_HOST, hostPort[0].trim());
    conf.setInt(REDIS_PORT, Integer.valueOf(hostPort[1].trim()));
    conf.setInt(REDIS_KEY_FIELD, Integer.valueOf(argMap.get(KEY_CMD).trim()));
    conf.setInt(REDIS_HASHKEY_FIELD, Integer.valueOf(argMap.get(HASH_KEY_CMD).trim()));
    conf.setInt(REDIS_HASHVAL_FIELD, Integer.valueOf(argMap.get(HASH_VAL_CMD).trim()));

    if (argMap.containsKey(REDIS_DB_CMD)) {
        conf.set(REDIS_DB, argMap.get(REDIS_DB_CMD).trim());
    }
    if (argMap.containsKey(REDIS_PW_CMD)) {
        conf.set(REDIS_PW, argMap.get(REDIS_PW_CMD).trim());
    }
    if (argMap.containsKey(KEY_PFX_CMD)) {
        conf.set(REDIS_KEY_PREFIX, argMap.get(KEY_PFX_CMD).trim());
    }
    if (argMap.containsKey(HASH_KEY_PFX_CMD)) {
        conf.set(REDIS_HASHKEY_PREFIX, argMap.get(HASH_KEY_PFX_CMD).trim());
    }
    if (argMap.containsKey(KEY_PFX_DELIM_CMD)) {
        conf.set(REDIS_KEY_PREFIX_DELIM, argMap.get(KEY_PFX_DELIM_CMD).trim());
    }
    if (argMap.containsKey(KEY_FILTER_CMD)) {
        conf.setPattern(REDIS_KEY_FILTER, Pattern.compile(argMap.get(KEY_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(HASH_FILTER_CMD)) {
        conf.setPattern(REDIS_HASH_FILTER, Pattern.compile(argMap.get(HASH_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(VAL_FILTER_CMD)) {
        conf.setPattern(REDIS_VAL_FILTER, Pattern.compile(argMap.get(VAL_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(VAL_FILTER_CMD)) {
        conf.setPattern(REDIS_VAL_FILTER, Pattern.compile(argMap.get(VAL_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(TTL_CMD)) {
        conf.setInt(REDIS_KEY_TTL, Integer.valueOf(argMap.get(TTL_CMD).trim()));
    }
    if (argMap.containsKey(TS_KEY_CMD)) {
        conf.set(REDIS_KEY_TS, argMap.get(TS_KEY_CMD).trim());
    } else {
        conf.set(REDIS_KEY_TS, "redis.lastupdate");
    }

    Job job = new Job(conf, "RedisDriver");
    FileInputFormat.addInputPath(job, new Path(argMap.get(INPUT_CMD)));
    job.setJarByClass(RedisDriver.class);
    job.setMapperClass(RedisOutputMapper.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(RedisOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.juniarto.secondsorter.SsJob.java

public int run(String[] allArgs) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf, "secondary sort");

    job.setJarByClass(SsJob.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(SsMapper.class);
    job.setReducerClass(SsReducer.class);
    job.setNumReduceTasks(2);/*from  www. ja va 2 s  .  com*/

    String[] args = new GenericOptionsParser(getConf(), allArgs).getRemainingArgs();
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //job.submit();

    long time1 = System.nanoTime();
    boolean status = job.waitForCompletion(true);
    long time2 = System.nanoTime();
    long timeSpent = time2 - time1;
    LOG.info("TIME: " + timeSpent);
    return 0;

}

From source file:com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonDriver.java

License:Open Source License

public static void main(String[] args) throws Exception {

    logger.info("Logger - Converting Kissmetrics Json to Valid Json files");
    System.out.println("Converting Kissmetrics Json to Valid Json files");
    System.out.println("defaultCharacterEncoding by property: " + System.getProperty("file.encoding"));
    System.out.println("defaultCharacterEncoding by code: " + getDefaultCharEncoding());
    System.out.println("defaultCharacterEncoding by charSet: " + Charset.defaultCharset());

    Job job = Job.getInstance();
    job.setJarByClass(KissmetricsJsonToEnrichedJsonDriver.class);
    job.setJobName("Kissmetrics Json to valid and enriched Json files");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //Add number of reducers
    int numberOfReducers = 2;
    if (args.length > 2 && args[2] != null) {
        numberOfReducers = Integer.parseInt(args[2]);
        if (numberOfReducers <= 0) {
            numberOfReducers = 2;//from   ww  w.  j  a v  a 2s .  c  om
        }
    }

    job.setMapperClass(com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setReducerClass(
            com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonReducer.class);
    job.setNumReduceTasks(numberOfReducers);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaDriver.java

License:Open Source License

public static void main(String[] args) throws Exception {

    int numberOfReducers = 1;
    if (args.length > 2 && args[2] != null) {
        numberOfReducers = Integer.parseInt(args[2]);
        if (numberOfReducers <= 0) {
            numberOfReducers = 1;/*  w  w w.  j  a va 2s .c om*/
        }
    }

    System.out.println("Kissmetrics Json Schema Extrator");

    Job job = Job.getInstance();
    job.setJarByClass(KissmetricsJsonToSchemaDriver.class);
    job.setJobName("Kissmetrics Json Schema Extrator");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaMapper.class);
    job.setReducerClass(com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaReducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.kangfoo.study.hadoop1.mp.typeformat.TestMapreduceSequenceInputFormat.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TestMapreduceSequenceInputFormat <in> <out>");
        System.exit(2);/*  w  w w.  j  a v a 2  s .  com*/
    }
    Job job = new Job(conf, "TestMapreduceSequenceInputFormat");
    job.setJarByClass(TestMapreduceSequenceInputFormat.class);//?
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class); // SequenceFileInputFormat
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.kangfoo.study.hadoop1.mp.typeformat.TestMapreduceTextInputFormat.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TestMapreduceTextInputFormat <in> <out>");
        System.exit(2);/*from  ww w. ja  v  a2 s  .  c o m*/
    }
    Job job = new Job(conf, "TestMapreduceTextInputFormat");
    job.setJarByClass(TestMapreduceTextInputFormat.class);//?
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}