Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.jbw.recommendsystem.joinim.JoinMRD.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path itemPath = new Path(conf.get("iteminput"));
    Path matrixPath = new Path(conf.get("matrixinput"));
    Path out = new Path(conf.get("output"));

    Job job = Job.getInstance(conf);
    job.setJobName("jjj");
    job.setJarByClass(JoinMRD.class);

    MultipleInputs.addInputPath(job, itemPath, TextInputFormat.class, IteamMapper.class);
    MultipleInputs.addInputPath(job, matrixPath, TextInputFormat.class, MatrixMapper.class);
    job.setMapOutputKeyClass(Text.class);

    job.setReducerClass(JoinReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, out);

    job.setOutputKeyClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jbw.recommendsystem.martrixlist.MartrixListMRD.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path in = new Path(conf.get("input"));
    Path out = new Path(conf.get("output"));

    Job surJob = Job.getInstance(conf);
    surJob.setJarByClass(MartrixListMRD.class);
    surJob.setJobName("user");

    surJob.setMapperClass(MListMapper.class);
    surJob.setReducerClass(MListReducer.class);

    surJob.setMapOutputKeyClass(Text.class);
    surJob.setMapOutputValueClass(Text.class);

    surJob.setOutputKeyClass(Text.class);
    surJob.setOutputValueClass(Text.class);

    surJob.setInputFormatClass(TextInputFormat.class);
    surJob.setOutputFormatClass(TextOutputFormat.class);

    TextInputFormat.addInputPath(surJob, in);
    TextOutputFormat.setOutputPath(surJob, out);

    return surJob.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jbw.tar.sf.TarDriver.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path in = new Path(conf.get("input"));
    Path out = new Path(conf.get("output"));

    Job job = Job.getInstance(conf);
    job.setJarByClass(TarDriver.class);
    job.setJobName("test");

    job.setMapperClass(SmallFileMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, in);

    job.setOutputFormatClass(TarOutputFormat.class);
    TarOutputFormat.setOutputPath(job, out);

    // job.setNumReduceTasks(0);

    /*/*w  ww .j  av  a  2s .co  m*/
        job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TarOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
     */
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jeffy.mr.WordCount.java

License:Apache License

/**
 * @param args//from w w  w.j a  v a 2  s.co  m
 */
public static void main(String[] args) {

    String input = "hdfs://master:8020/tmp/jeffy/input/wordcount.txt";
    String output = "hdfs://master:8020/tmp/jeffy/output";
    Configuration config = new Configuration();
    /**
     * Windows???no jobCtrol
     * http://stackoverflow.com/questions/24075669/mapreduce-job-fail-when-submitted-from-windows-machine
     */
    config.set("mapreduce.app-submission.cross-platform", "true");
    config.set("mapred.remote.os", "Linux");
    try {
        Job job = Job.getInstance(config);
        //Windows???
        job.setJarByClass(WordCount.class);
        //?????
        job.setJar("D:\\bigdata\\mapreduce-demo\\src\\main\\java\\WordCount.jar");
        job.setJobName("Wordcount job");
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        TextInputFormat.setInputPaths(job, new Path(input));
        TextOutputFormat.setOutputPath(job, new Path(output));
        // Submit the job, then poll for progress until the job is complete
        try {
            job.waitForCompletion(true);
        } catch (ClassNotFoundException | InterruptedException e) {
            e.printStackTrace();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:com.jet.hadoop.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    long timeBegin = System.currentTimeMillis();
    System.out.println("hadoop wordcount begins at" + timeBegin);

    if (args == null || args.length == 0) {
        args = new String[2];
        args[0] = "E:\\Work\\input\\hello.txt";
        args[1] = "E:\\Work\\output";
    }//from   w  w w.j  av  a  2s  . co  m

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    //      job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    //      job.setNumReduceTasks(2);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    boolean result = job.waitForCompletion(true);

    long timeEnd = System.currentTimeMillis();
    System.out.println("hadoop wordcount ended at" + timeEnd);
    System.out.println("hadoop wordcount cost time" + (timeEnd - timeBegin) / 1000 + " seconds.");

    System.exit(result ? 0 : 1);
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, AverageJob.class.getSimpleName());
    job.setJarByClass(AverageJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Average Job");
    job.setMapperClass(AverageMapper.class);
    job.setCombinerClass(AverageCombiner.class);
    job.setReducerClass(AverageReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;/*  w  w w .java  2s . c  om*/
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageMultipleOutputJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, AverageMultipleOutputJob.class.getSimpleName());
    job.setJarByClass(AverageMultipleOutputJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Multiple Output Job");
    job.setMapperClass(AverageMapper.class);
    job.setReducerClass(AverageMultipleOutputReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    MultipleOutputs.addNamedOutput(job, "greaterThan1000", TextOutputFormat.class, Text.class,
            DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "lessThan1000", TextOutputFormat.class, Text.class,
            DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;/*from   w ww  .j  a v a 2 s .c o m*/
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.BloomFilterJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, BloomFilterJob.class.getSimpleName());
    job.setJarByClass(BloomFilterJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample BloomFilter Job");
    job.setMapperClass(BloomFilterMapper.class);
    job.setReducerClass(BloomFilterReducer.class);
    job.setNumReduceTasks(1);/*from w  w  w . j  a v  a 2s.c om*/

    job.setInputFormatClass(TextInputFormat.class);

    /*
     * We want our reducer to output the final BloomFilter as a binary file. I think 
     * Hadoop doesn't have this format [check later], so using NullOutpuFormat.class.
     * 
     * In general life gets a little more dangerous when you deviate from MapReduce's input/output 
     * framework and start working with your own files. Your tasks are no longer guaranteed to be idempotent 
     * and you'll need to understand how various failure scenarios can affect your tasks. For example, your files 
     * may only be partially written when some tasks are restarted. Our example here is safe(r) because all the file 
     * operations take place together only once in the close() method and in only one reducer. A more 
     * careful/paranoid implementation would check each individual file operation more closely.
     */
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BloomFilter.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.CountJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, CountJob.class.getSimpleName());
    job.setJarByClass(CountJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Count Job");
    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;/*from   w w  w .ja v  a  2 s.  c  om*/
}

From source file:com.jumptap.h2redis.RedisDriver.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 5) {
        usage();/* www  .  ja  va  2  s. c  om*/
        return 1;
    }

    Map<String, String> argMap = new HashMap<String, String>();
    String[] kv;

    for (String arg : args) {
        kv = arg.split("=");
        if (kv.length != 2) {
            usage();
            return 1;
        }
        argMap.put(kv[0].trim(), kv[1]);
    }

    Configuration conf = getConf();
    String[] hostPort = argMap.get(REDIS_CMD).split(":");
    conf.set(REDIS_HOST, hostPort[0].trim());
    conf.setInt(REDIS_PORT, Integer.valueOf(hostPort[1].trim()));
    conf.setInt(REDIS_KEY_FIELD, Integer.valueOf(argMap.get(KEY_CMD).trim()));
    conf.setInt(REDIS_HASHKEY_FIELD, Integer.valueOf(argMap.get(HASH_KEY_CMD).trim()));
    conf.setInt(REDIS_HASHVAL_FIELD, Integer.valueOf(argMap.get(HASH_VAL_CMD).trim()));

    if (argMap.containsKey(REDIS_DB_CMD)) {
        conf.set(REDIS_DB, argMap.get(REDIS_DB_CMD).trim());
    }
    if (argMap.containsKey(REDIS_PW_CMD)) {
        conf.set(REDIS_PW, argMap.get(REDIS_PW_CMD).trim());
    }
    if (argMap.containsKey(KEY_PFX_CMD)) {
        conf.set(REDIS_KEY_PREFIX, argMap.get(KEY_PFX_CMD).trim());
    }
    if (argMap.containsKey(HASH_KEY_PFX_CMD)) {
        conf.set(REDIS_HASHKEY_PREFIX, argMap.get(HASH_KEY_PFX_CMD).trim());
    }
    if (argMap.containsKey(KEY_PFX_DELIM_CMD)) {
        conf.set(REDIS_KEY_PREFIX_DELIM, argMap.get(KEY_PFX_DELIM_CMD).trim());
    }
    if (argMap.containsKey(KEY_FILTER_CMD)) {
        conf.setPattern(REDIS_KEY_FILTER, Pattern.compile(argMap.get(KEY_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(HASH_FILTER_CMD)) {
        conf.setPattern(REDIS_HASH_FILTER, Pattern.compile(argMap.get(HASH_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(VAL_FILTER_CMD)) {
        conf.setPattern(REDIS_VAL_FILTER, Pattern.compile(argMap.get(VAL_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(VAL_FILTER_CMD)) {
        conf.setPattern(REDIS_VAL_FILTER, Pattern.compile(argMap.get(VAL_FILTER_CMD).trim()));
    }
    if (argMap.containsKey(TTL_CMD)) {
        conf.setInt(REDIS_KEY_TTL, Integer.valueOf(argMap.get(TTL_CMD).trim()));
    }
    if (argMap.containsKey(TS_KEY_CMD)) {
        conf.set(REDIS_KEY_TS, argMap.get(TS_KEY_CMD).trim());
    } else {
        conf.set(REDIS_KEY_TS, "redis.lastupdate");
    }

    Job job = new Job(conf, "RedisDriver");
    FileInputFormat.addInputPath(job, new Path(argMap.get(INPUT_CMD)));
    job.setJarByClass(RedisDriver.class);
    job.setMapperClass(RedisOutputMapper.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(RedisOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}