Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the job output data.

Usage

From source file:WordCount_SiCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from ww  w  .j  a  v a 2  s.  com
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_SiCombiner.class);
    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);

    // disable combiner
    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartition.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:GetRetweetersAndCountPerUser.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>");
        System.exit(2);//from w  w  w .jav  a  2s.  com
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(RetweetersPerUser.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    System.out.println(otherArgs[0]);
    job.setMapperClass(TweetMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    if (job.waitForCompletion(true)) {
        FileSystem hdfs = FileSystem.get(new URI(args[1]), conf);
        Path dir = new Path(args[1]);
        PathFilter filter = new PathFilter() {
            public boolean accept(Path file) {
                return file.getName().startsWith("part-r-");
            }
        };

        HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>();
        FileStatus[] files = hdfs.listStatus(dir, filter);
        Arrays.sort(files);
        for (int i = 0; i != files.length; i++) {
            Path pt = files[i].getPath();
            BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt)));
            String line = null;
            while ((line = br.readLine()) != null) {
                String[] columns = new String[2];
                columns = line.split("\t");
                int key = Integer.parseInt(columns[0]);
                if (counts_for_user.containsKey(key))
                    counts_for_user.put(key, counts_for_user.get(key) + 1);
                else
                    counts_for_user.put(key, 1);
            }
            br.close();
        }

        FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count"));
        PrintWriter writer = new PrintWriter(fsDataOutputStream);
        for (Entry<Integer, Integer> e : counts_for_user.entrySet()) {
            writer.write(e.getKey() + "\t" + e.getValue() + "\n");
        }
        writer.close();
        fsDataOutputStream.close();
        hdfs.close();
        System.exit(0);
    }
    System.exit(1);
}

From source file:WordCount_PerMapTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//  w  w w  .  j  av a  2  s .  co m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_PerMapTally.class);
    job.setMapperClass(TokenizerMapper.class);

    // disable combiner
    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartitioner.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:SiCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from  w w  w  .j  a va2 s. c  o m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(SiCombiner.class);
    job.setMapperClass(TokenizerMapper.class);
    // Aniket changes starts
    /* Here the partitioner is being called*/
    job.setPartitionerClass(WordPartitioner.class);
    // Aniket changes ends
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:PerMapTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w w w . j  a  v  a2  s.  c om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(PerMapTally.class);
    job.setMapperClass(TokenizerMapper.class);
    // Aniket changes starts
    /* Here the partitioner is being called*/
    job.setPartitionerClass(WordPartitioner.class);
    // Aniket changes ends
    // Part 3 Aniket changes starts
    /* Here I am just disabling the combiner */
    // job.setCombinerClass(IntSumReducer.class);
    // Part 3 Aniket changes ends
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:WordCountCounters.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), "wordcountcounters");
    job.setJarByClass(WordCountCounters.class);
    job.setMapperClass(SumMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));

    job.setInputFormatClass(ColumnFamilyInputFormat.class);

    ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setInputPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.Murmur3Partitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE,
            WordCountCounters.COUNTER_COLUMN_FAMILY);
    SlicePredicate predicate = new SlicePredicate()
            .setSlice_range(new SliceRange().setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER)
                    .setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).setCount(100));
    ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

    job.waitForCompletion(true);//from www  .j  a v  a  2  s  .c  om
    return 0;
}

From source file:ImageDuplicatesRemover.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    //This is the line that makes the hadoop run locally
    //conf.set("mapred.job.tracker", "local");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from w  w  w  .j  av  a 2  s. c om
    }
    Job job = new Job(conf, "image dups remover");
    job.setJarByClass(ImageDuplicatesRemover.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(ImageMd5Mapper.class);
    job.setReducerClass(ImageDupsReducer.class);
    //job.setNumReduceTasks(2);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

From source file:ReverseIndexer.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: ReverseIndexer <output> <input file(s)>");
        System.exit(2);//from  ww w . j  av a 2 s.  c o  m
    }
    Job job = new Job(conf, "reverse indexer");
    job.setJarByClass(ReverseIndexer.class);
    job.setMapperClass(IndexerMapper.class);
    job.setReducerClass(IndexerReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LineRecWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    for (int i = 1; i < otherArgs.length; i++) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[0]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Authset.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from   w ww. j av a 2  s  .c  o  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(Authset.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(10);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    //DistributedCache.addCacheFile(new Path(otherArgs[0]).toUri(),
    //job.getConfiguration());

    //DistributedCache.setLocalFiles(job.getConfiguration(), otherArgs[0]);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:RHBlockToKeyRangeIndex.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: rhblockindex <in> <out>");
        System.exit(2);/*from   w  ww  .j a  va2s . co m*/
    }
    Job job = new Job(conf, "rhblockindex");
    job.setJarByClass(RHBlockToKeyRangeIndex.class);
    job.setMapperClass(RMapper.class);
    job.setCombinerClass(RReducer.class);
    job.setReducerClass(RReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}