Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the map output data.

Usage

From source file:cp_c.CP_C.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(CP_C.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(IntSumReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public int run(String[] args) throws Exception {
        Job job = JobBuilder.parseInputAndOutput(this, getConf(), args);
        if (job == null) {
            return -1;
        }/*from  w ww  . j  a va2s  . co m*/

        /*[*/job.setInputFormatClass(TextInputFormat.class);

        job.setMapperClass(Mapper.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setPartitionerClass(HashPartitioner.class);

        job.setNumReduceTasks(1);
        job.setReducerClass(Reducer.class);

        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Text.class);

        job.setOutputFormatClass(TextOutputFormat.class);/*]*/

        return job.waitForCompletion(true) ? 0 : 1;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public int run(String[] args) throws Exception {
        Job job = JobBuilder.parseInputAndOutput(this, getConf(), args);
        if (job == null) {
            return -1;
        }/*ww w.jav  a  2 s.  c om*/

        job.setMapperClass(StationMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setReducerClass(MultipleOutputsReducer.class);
        job.setOutputKeyClass(NullWritable.class);

        return job.waitForCompletion(true) ? 0 : 1;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public int run(String[] args) throws Exception {
        if (args.length != 3) {
            JobBuilder.printUsage(this, "<ncdc input> <station input> <output>");
            return -1;
        }/*  w  w w  . j av  a 2s. c  o  m*/

        Job job = new Job(getConf(), "Join weather records with station names");
        job.setJarByClass(getClass());

        Path ncdcInputPath = new Path(args[0]);
        Path stationInputPath = new Path(args[1]);
        Path outputPath = new Path(args[2]);

        MultipleInputs.addInputPath(job, ncdcInputPath, TextInputFormat.class, JoinRecordMapper.class);
        MultipleInputs.addInputPath(job, stationInputPath, TextInputFormat.class, JoinStationMapper.class);
        FileOutputFormat.setOutputPath(job, outputPath);

        /*[*/job.setPartitionerClass(KeyPartitioner.class);
        job.setGroupingComparatorClass(TextPair.FirstComparator.class);/*]*/

        job.setMapOutputKeyClass(TextPair.class);

        job.setReducerClass(JoinReducer.class);

        job.setOutputKeyClass(Text.class);

        return job.waitForCompletion(true) ? 0 : 1;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

public int run(String[] args) throws Exception {
        Job job = new Job(getConf());

        job.setJarByClass(MaxWidgetId.class);

        job.setMapperClass(MaxWidgetMapper.class);
        job.setReducerClass(MaxWidgetReducer.class);

        FileInputFormat.addInputPath(job, new Path("widgets"));
        FileOutputFormat.setOutputPath(job, new Path("maxwidget"));

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Widget.class);

        job.setOutputKeyClass(Widget.class);
        job.setOutputValueClass(NullWritable.class);

        job.setNumReduceTasks(1);//from   w  ww. jav  a  2  s.  c  om

        if (!job.waitForCompletion(true)) {
            return 1; // error.
        }

        return 0;
    }

From source file:cs6240.project.decisiontree.Pseudohigstest.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    DistributedCache.addCacheFile(new URI("s3://hr6240/higs/testing/5/higshistogram"), conf);
    // DistributedCache.addCacheFile(new
    // URI("/home/hraj17/Downloads/part-hig"),conf);
    Job job = new Job(conf, "word count");
    job.setJarByClass(Pseudohigstest.class);
    job.setMapperClass(TestingMapper.class);
    job.setReducerClass(TestingReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setPartitionerClass(TestingPartioner.class);
    job.setNumReduceTasks(2);/*from   w ww.j  a v a 2 s.  c  o  m*/

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:cs6240.project.decisiontree.Pseudotestingtwitter.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    DistributedCache.addCacheFile(new URI("s3://hr6240/histogram/5/metadata5"), conf);
    Job job = new Job(conf, "word count");
    job.setJarByClass(Pseudotestingtwitter.class);
    job.setMapperClass(TestingMapper.class);
    job.setReducerClass(TestingReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setPartitionerClass(TestingPartioner.class);
    job.setNumReduceTasks(2);//  w  w  w.  j  ava  2s  . co  m
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:csc555.ebratt.depaul.edu.AverageScoreDriver.java

License:Open Source License

/**
 * //  www.  j a va  2s  .com
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the AverageScoreReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("average score by: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), AverageScoreDriver.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 2);

    // Mapper and Reducer Classes to use
    job.setMapperClass(AverageScoreMapper.class);
    job.setReducerClass(AverageScoreReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(AverageScoreReducer.class);
    }

    // The Jar file to run
    job.setJarByClass(AverageScoreDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.AverageScoreRankerDriver.java

License:Open Source License

/**
 * //from www. j  a  v a2 s  . c o  m
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the AverageScoreRankerReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    job.setJobName("average score ranked");

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setNumReduceTasks(1);

    // Mapper and Reducer Classes to use
    job.setMapperClass(AverageScoreRankerMapper.class);
    job.setReducerClass(AverageScoreRankerReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(DoubleWritable.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(AverageScoreRankerReducer.class);
    }

    // sort in descending order
    job.setSortComparatorClass(DoubleWritableDescendingComparator.class);

    // The Jar file to run
    job.setJarByClass(AverageScoreRankerDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.GildedCountDriver.java

License:Open Source License

/**
 * //  ww  w  .  ja  v  a  2 s .  c  om
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the GildedCountReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("count of gilded comments grouped by: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), GildedCountDriver.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 2);

    // Mapper and Reducer Classes to use
    job.setMapperClass(GildedCountMapper.class);
    job.setReducerClass(LongSumReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(LongSumReducer.class);
    }

    // The Jar file to run
    job.setJarByClass(GildedCountDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}