Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.jbw.recommendsystem.joinim.JoinMRD.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path itemPath = new Path(conf.get("iteminput"));
    Path matrixPath = new Path(conf.get("matrixinput"));
    Path out = new Path(conf.get("output"));

    Job job = Job.getInstance(conf);
    job.setJobName("jjj");
    job.setJarByClass(JoinMRD.class);

    MultipleInputs.addInputPath(job, itemPath, TextInputFormat.class, IteamMapper.class);
    MultipleInputs.addInputPath(job, matrixPath, TextInputFormat.class, MatrixMapper.class);
    job.setMapOutputKeyClass(Text.class);

    job.setReducerClass(JoinReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, out);

    job.setOutputKeyClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jbw.recommendsystem.martrixlist.MartrixListMRD.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path in = new Path(conf.get("input"));
    Path out = new Path(conf.get("output"));

    Job surJob = Job.getInstance(conf);
    surJob.setJarByClass(MartrixListMRD.class);
    surJob.setJobName("user");

    surJob.setMapperClass(MListMapper.class);
    surJob.setReducerClass(MListReducer.class);

    surJob.setMapOutputKeyClass(Text.class);
    surJob.setMapOutputValueClass(Text.class);

    surJob.setOutputKeyClass(Text.class);
    surJob.setOutputValueClass(Text.class);

    surJob.setInputFormatClass(TextInputFormat.class);
    surJob.setOutputFormatClass(TextOutputFormat.class);

    TextInputFormat.addInputPath(surJob, in);
    TextOutputFormat.setOutputPath(surJob, out);

    return surJob.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jbw.tar.sf.TarDriver.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path in = new Path(conf.get("input"));
    Path out = new Path(conf.get("output"));

    Job job = Job.getInstance(conf);
    job.setJarByClass(TarDriver.class);
    job.setJobName("test");

    job.setMapperClass(SmallFileMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, in);

    job.setOutputFormatClass(TarOutputFormat.class);
    TarOutputFormat.setOutputPath(job, out);

    // job.setNumReduceTasks(0);

    /*/*w  ww .j  av  a  2s .co  m*/
        job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TarOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
     */
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jeffy.mr.WordCount.java

License:Apache License

/**
 * @param args//from w w  w.j a  v a 2  s.co  m
 */
public static void main(String[] args) {

    String input = "hdfs://master:8020/tmp/jeffy/input/wordcount.txt";
    String output = "hdfs://master:8020/tmp/jeffy/output";
    Configuration config = new Configuration();
    /**
     * Windows???no jobCtrol
     * http://stackoverflow.com/questions/24075669/mapreduce-job-fail-when-submitted-from-windows-machine
     */
    config.set("mapreduce.app-submission.cross-platform", "true");
    config.set("mapred.remote.os", "Linux");
    try {
        Job job = Job.getInstance(config);
        //Windows???
        job.setJarByClass(WordCount.class);
        //?????
        job.setJar("D:\\bigdata\\mapreduce-demo\\src\\main\\java\\WordCount.jar");
        job.setJobName("Wordcount job");
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        TextInputFormat.setInputPaths(job, new Path(input));
        TextOutputFormat.setOutputPath(job, new Path(output));
        // Submit the job, then poll for progress until the job is complete
        try {
            job.waitForCompletion(true);
        } catch (ClassNotFoundException | InterruptedException e) {
            e.printStackTrace();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:com.jet.hadoop.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    long timeBegin = System.currentTimeMillis();
    System.out.println("hadoop wordcount begins at" + timeBegin);

    if (args == null || args.length == 0) {
        args = new String[2];
        args[0] = "E:\\Work\\input\\hello.txt";
        args[1] = "E:\\Work\\output";
    }//from   w  w w.j  av  a  2s  . co  m

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    //      job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    //      job.setNumReduceTasks(2);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    boolean result = job.waitForCompletion(true);

    long timeEnd = System.currentTimeMillis();
    System.out.println("hadoop wordcount ended at" + timeEnd);
    System.out.println("hadoop wordcount cost time" + (timeEnd - timeBegin) / 1000 + " seconds.");

    System.exit(result ? 0 : 1);
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, AverageJob.class.getSimpleName());
    job.setJarByClass(AverageJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Average Job");
    job.setMapperClass(AverageMapper.class);
    job.setCombinerClass(AverageCombiner.class);
    job.setReducerClass(AverageReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;/*  w  w w .java  2s . c  om*/
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageMultipleOutputJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, AverageMultipleOutputJob.class.getSimpleName());
    job.setJarByClass(AverageMultipleOutputJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Multiple Output Job");
    job.setMapperClass(AverageMapper.class);
    job.setReducerClass(AverageMultipleOutputReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    MultipleOutputs.addNamedOutput(job, "greaterThan1000", TextOutputFormat.class, Text.class,
            DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "lessThan1000", TextOutputFormat.class, Text.class,
            DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;/*from   w ww  .j  a v a 2 s .c o m*/
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.BloomFilterJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, BloomFilterJob.class.getSimpleName());
    job.setJarByClass(BloomFilterJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample BloomFilter Job");
    job.setMapperClass(BloomFilterMapper.class);
    job.setReducerClass(BloomFilterReducer.class);
    job.setNumReduceTasks(1);/*from w  w  w . j  a v  a 2s.c om*/

    job.setInputFormatClass(TextInputFormat.class);

    /*
     * We want our reducer to output the final BloomFilter as a binary file. I think 
     * Hadoop doesn't have this format [check later], so using NullOutpuFormat.class.
     * 
     * In general life gets a little more dangerous when you deviate from MapReduce's input/output 
     * framework and start working with your own files. Your tasks are no longer guaranteed to be idempotent 
     * and you'll need to understand how various failure scenarios can affect your tasks. For example, your files 
     * may only be partially written when some tasks are restarted. Our example here is safe(r) because all the file 
     * operations take place together only once in the close() method and in only one reducer. A more 
     * careful/paranoid implementation would check each individual file operation more closely.
     */
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BloomFilter.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.CountJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, CountJob.class.getSimpleName());
    job.setJarByClass(CountJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Count Job");
    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;/*from   w w  w .ja v  a  2 s.  c  om*/
}

From source file:com.jumptap.h2redis.RedisDriver.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 5) {
        usage();/* www  .  ja  va  2  s. c  om*/
        return 1;
    }

    Map<String, String> argMap = new HashMap<String, String>();
    String[] kv;

    for (String arg : args) {
        kv = arg.split("=");
        if (kv.length != 2) {
            usage();
            return 1;
        }
        argMap.put(kv[0].trim(), kv[1]);
    }

    Configuration conf = getConf();
    String[] hostPort = argMap.get(REDIS_CMD).split(":");
    conf.set(REDIS_HOST, hostPort[0].trim());
    conf.setInt(REDIS_PORT, Integer.valueOf(hostPort[1].trim()));
    conf.setInt(REDIS_KEY_FIELD, Integer.valueOf(argMap.get(KEY_CMD).trim()));
    conf.setInt(REDIS_HASHKEY_FIELD, Integer.valueOf(argMap.get(HASH_KEY_CMD).trim()));
    conf.setInt(REDIS_HASHVAL_FIELD, Integer.valueOf(argMap.get(HASH_VAL_CMD).trim()));

    if (argMap.containsKey(REDIS_DB_CMD)) {
        conf.set(REDIS_DB, argMap.get(REDIS_DB_CMD).trim());
    }
    if (argMap.containsKey(REDIS_PW_CMD)) {
        conf.set(REDIS_PW, argMap.get(REDIS_PW_CMD).trim());
    }
    if (argMap.containsKey(KEY_PFX_CMD)) {
        conf.set(REDIS_KEY_PREFIX, argMap.get(KEY_PFX_CMD).trim());
    }
    if (argMap.containsKey(HASH_KEY_PFX_CMD)) {
        conf.set(REDIS_HASHKEY_PREFIX, argMap.get(HASH_KEY_PFX_CMD).trim());
    }
    if (argMap.containsKey(KEY_PFX_DELIM_CMD)) {
        conf.set(REDIS_KEY_PREFIX_DELIM, argMap.get(KEY_PFX_DELIM_CMD).trim());
    }
    if (argMap.containsKey(KEY_FILTER_CMD)) {
        conf.setPattern(REDIS_KEY_FILTER, Pattern.compile(argMap.get(KEY_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(HASH_FILTER_CMD)) {
        conf.setPattern(REDIS_HASH_FILTER, Pattern.compile(argMap.get(HASH_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(VAL_FILTER_CMD)) {
        conf.setPattern(REDIS_VAL_FILTER, Pattern.compile(argMap.get(VAL_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(VAL_FILTER_CMD)) {
        conf.setPattern(REDIS_VAL_FILTER, Pattern.compile(argMap.get(VAL_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(TTL_CMD)) {
        conf.setInt(REDIS_KEY_TTL, Integer.valueOf(argMap.get(TTL_CMD).trim()));
    }
    if (argMap.containsKey(TS_KEY_CMD)) {
        conf.set(REDIS_KEY_TS, argMap.get(TS_KEY_CMD).trim());
    } else {
        conf.set(REDIS_KEY_TS, "redis.lastupdate");
    }

    Job job = new Job(conf, "RedisDriver");
    FileInputFormat.addInputPath(job, new Path(argMap.get(INPUT_CMD)));
    job.setJarByClass(RedisDriver.class);
    job.setMapperClass(RedisOutputMapper.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(RedisOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}