Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:csc555.ebratt.depaul.edu.GildedSorterDriver.java

License:Open Source License

/**
 * //from w ww.  jav a 2 s.  c  o m
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the GildedSorterReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    StringBuffer sb = new StringBuffer();
    sb.append("sorted gild counts");
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // to ensure output is sorted
    job.setNumReduceTasks(1);

    // Mapper and Reducer Classes to use
    job.setMapperClass(GildedSorterMapper.class);
    job.setReducerClass(GildedSorterReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(GildedSorterReducer.class);
    }

    // sort in descending order
    job.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    // The Jar file to run
    job.setJarByClass(GildedSorterDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.GildPercentDriverPass1.java

License:Open Source License

/**
 * /*www .jav  a 2 s  . com*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the GildPercentReducerPass1.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("gild percent of: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), GildPercentDriverPass1.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 2);

    // Mapper and Reducer Classes to use
    job.setMapperClass(GildPercentMapperPass1.class);
    job.setReducerClass(GildPercentReducerPass1.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(GildPercentReducerPass1.class);
    }

    // The Jar file to run
    job.setJarByClass(GildPercentDriverPass1.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.GildPercentDriverPass2.java

License:Open Source License

/**
 * /*from w ww  . j a  v  a2 s. c om*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the GildPercentReducerPass2.class as
 *            the combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    StringBuffer sb = new StringBuffer();
    sb.append("sorted gild percent");
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // to ensure output is sorted
    job.setNumReduceTasks(1);

    // Mapper and Reducer Classes to use
    job.setMapperClass(GildPercentMapperPass2.class);
    job.setReducerClass(GildPercentReducerPass2.class);

    // Mapper output classes
    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(DoubleWritable.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(GildPercentReducerPass2.class);
    }

    // sort in descending order
    job.setSortComparatorClass(DoubleWritableDescendingComparator.class);

    // The Jar file to run
    job.setJarByClass(GildPercentDriverPass2.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.RCTop10Driver.java

License:Open Source License

/**
 * //from w  ww . jav a2 s . co m
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
public int run(String[] args) throws Exception {

    Job job = new Job(getConf(), "Top 10 Reddit");

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // ensure 1 reduce tasks for ranking
    job.setNumReduceTasks(1);

    // Mapper and Reducer Classes to use
    job.setMapperClass(RCTop10Mapper.class);
    job.setReducerClass(RCTop10Reducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(GroupByCountPair.class);
    job.setMapOutputValueClass(Text.class);

    // set custom partitioner
    job.setPartitionerClass(GroupByCountPairPartitioner.class);

    // set custom grouping comparator
    job.setGroupingComparatorClass(GroupByGroupingComparator.class);

    // input class
    job.setInputFormatClass(KeyValueTextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(GroupByCountPair.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The Jar file to run
    job.setJarByClass(RCTop10Driver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.RCWordCountAcronymsDriver.java

License:Open Source License

/**
 * /*from  w w w.  j av  a2s  . co m*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [3] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the RCWordCountReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String aggregate = getConf().get("aggregate");
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("count of acronyms in: ");
    sb.append(aggregate);
    sb.append("; grouped by: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), RCWordCountAcronymsDriver.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 2);

    // Mapper and Reducer Classes to use
    job.setMapperClass(RCWordCountMapper.class);
    job.setReducerClass(RCWordCountReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[3].equals("yes")) {
        job.setCombinerClass(RCWordCountReducer.class);
    }

    // The Jar file to run
    job.setJarByClass(RCWordCountAcronymsDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.RCWordCountDriver.java

License:Open Source License

/**
 * /*w ww .  j  a v  a 2 s. c  om*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [3] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the RCWordCountReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String aggregate = getConf().get("aggregate");
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("count of: ");
    sb.append(aggregate);
    sb.append("; grouped by: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), RCWordCountDriver.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 5);

    // Mapper and Reducer Classes to use
    job.setMapperClass(RCWordCountMapper.class);
    job.setReducerClass(RCWordCountReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[3].equals("yes")) {
        job.setCombinerClass(RCWordCountReducer.class);
    }

    // The Jar file to run
    job.setJarByClass(RCWordCountDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.VoteCountDriver.java

License:Open Source License

/**
 * //from   www . ja va 2  s.c  o m
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the VoteCountReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("count of votes grouped by: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), VoteCountDriver.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 2);

    // Mapper and Reducer Classes to use
    job.setMapperClass(VoteCountMapper.class);
    job.setReducerClass(LongSumReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(LongSumReducer.class);
    }

    // The Jar file to run
    job.setJarByClass(VoteCountDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.VoteSorterDriver.java

License:Open Source License

/**
 * /*  w w  w . j  a  v a  2 s  .c  om*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the VoteSorterReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    StringBuffer sb = new StringBuffer();
    sb.append("sorted vote counts");
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // to ensure output is sorted
    job.setNumReduceTasks(1);

    // Mapper and Reducer Classes to use
    job.setMapperClass(VoteSorterMapper.class);
    job.setReducerClass(VoteSorterReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(VoteSorterReducer.class);
    }

    // sort in descending order
    job.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    // The Jar file to run
    job.setJarByClass(VoteSorterDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:de.bankmark.bigbench.queries.q18.MRlinearRegression.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    int NUMBER_REDUCERS = 1;
    Job job = Job.getInstance(getConf());

    job.setJarByClass(MRlinearRegression.class);
    if (args.length != 2) {
        usage(job);/*from   ww w .j  a  v  a  2  s  .co m*/
        return 2;
    }
    System.out.println("input:");
    job.setJobName(MRlinearRegression.class.getSimpleName() + "::" + args[0] + "->" + args[1]);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    System.out.println("Input: " + input + "  out -> " + output);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(MRlinearRegression.LRmapper.class);
    job.setReducerClass(MRlinearRegression.LRreducer.class);
    job.setNumReduceTasks(NUMBER_REDUCERS);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DoubleArrayWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.bankmark.bigbench.queries.q28.ToSequenceFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Job job = Job.getInstance(getConf());

    job.setJarByClass(ToSequenceFile.class);
    if (args.length != 2) {
        usage(job);//from   w w w . ja va 2s. c  om
        return 2;
    }
    System.out.println("input:");
    job.setJobName(ToSequenceFile.class.getSimpleName() + "::" + args[0] + "->" + args[1]);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    System.out.println("Input: " + input + "  out -> " + output);
    FileInputFormat.addInputPath(job, input);
    SequenceFileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(IdentityMapper.class);
    job.setReducerClass(Reducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;
}