Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the map output data.

Usage

From source file:csc555.ebratt.depaul.edu.GildedSorterDriver.java

License:Open Source License

/**
 * //from w ww.  jav a 2 s.  c  o m
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the GildedSorterReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    StringBuffer sb = new StringBuffer();
    sb.append("sorted gild counts");
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // to ensure output is sorted
    job.setNumReduceTasks(1);

    // Mapper and Reducer Classes to use
    job.setMapperClass(GildedSorterMapper.class);
    job.setReducerClass(GildedSorterReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(GildedSorterReducer.class);
    }

    // sort in descending order
    job.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    // The Jar file to run
    job.setJarByClass(GildedSorterDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.GildPercentDriverPass1.java

License:Open Source License

/**
 * /*www .jav  a 2 s  . com*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the GildPercentReducerPass1.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("gild percent of: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), GildPercentDriverPass1.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 2);

    // Mapper and Reducer Classes to use
    job.setMapperClass(GildPercentMapperPass1.class);
    job.setReducerClass(GildPercentReducerPass1.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(GildPercentReducerPass1.class);
    }

    // The Jar file to run
    job.setJarByClass(GildPercentDriverPass1.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.GildPercentDriverPass2.java

License:Open Source License

/**
 * /*from w ww  . j a  v  a2 s. c om*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the GildPercentReducerPass2.class as
 *            the combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    StringBuffer sb = new StringBuffer();
    sb.append("sorted gild percent");
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // to ensure output is sorted
    job.setNumReduceTasks(1);

    // Mapper and Reducer Classes to use
    job.setMapperClass(GildPercentMapperPass2.class);
    job.setReducerClass(GildPercentReducerPass2.class);

    // Mapper output classes
    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(DoubleWritable.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(GildPercentReducerPass2.class);
    }

    // sort in descending order
    job.setSortComparatorClass(DoubleWritableDescendingComparator.class);

    // The Jar file to run
    job.setJarByClass(GildPercentDriverPass2.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.RCTop10Driver.java

License:Open Source License

/**
 * //from w  ww . jav a2 s . co m
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
public int run(String[] args) throws Exception {

    Job job = new Job(getConf(), "Top 10 Reddit");

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // ensure 1 reduce tasks for ranking
    job.setNumReduceTasks(1);

    // Mapper and Reducer Classes to use
    job.setMapperClass(RCTop10Mapper.class);
    job.setReducerClass(RCTop10Reducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(GroupByCountPair.class);
    job.setMapOutputValueClass(Text.class);

    // set custom partitioner
    job.setPartitionerClass(GroupByCountPairPartitioner.class);

    // set custom grouping comparator
    job.setGroupingComparatorClass(GroupByGroupingComparator.class);

    // input class
    job.setInputFormatClass(KeyValueTextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(GroupByCountPair.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The Jar file to run
    job.setJarByClass(RCTop10Driver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.RCWordCountAcronymsDriver.java

License:Open Source License

/**
 * /*from  w w w.  j av  a2s  . co m*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [3] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the RCWordCountReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String aggregate = getConf().get("aggregate");
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("count of acronyms in: ");
    sb.append(aggregate);
    sb.append("; grouped by: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), RCWordCountAcronymsDriver.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 2);

    // Mapper and Reducer Classes to use
    job.setMapperClass(RCWordCountMapper.class);
    job.setReducerClass(RCWordCountReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[3].equals("yes")) {
        job.setCombinerClass(RCWordCountReducer.class);
    }

    // The Jar file to run
    job.setJarByClass(RCWordCountAcronymsDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.RCWordCountDriver.java

License:Open Source License

/**
 * /*w ww .  j  a v  a 2 s. c  om*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [3] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the RCWordCountReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String aggregate = getConf().get("aggregate");
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("count of: ");
    sb.append(aggregate);
    sb.append("; grouped by: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), RCWordCountDriver.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 5);

    // Mapper and Reducer Classes to use
    job.setMapperClass(RCWordCountMapper.class);
    job.setReducerClass(RCWordCountReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[3].equals("yes")) {
        job.setCombinerClass(RCWordCountReducer.class);
    }

    // The Jar file to run
    job.setJarByClass(RCWordCountDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.VoteCountDriver.java

License:Open Source License

/**
 * //from   www . ja va 2  s.c  o m
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the VoteCountReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("count of votes grouped by: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), VoteCountDriver.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 2);

    // Mapper and Reducer Classes to use
    job.setMapperClass(VoteCountMapper.class);
    job.setReducerClass(LongSumReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(LongSumReducer.class);
    }

    // The Jar file to run
    job.setJarByClass(VoteCountDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.VoteSorterDriver.java

License:Open Source License

/**
 * /*  w w  w . j  a  v a  2 s  .c  om*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the VoteSorterReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    StringBuffer sb = new StringBuffer();
    sb.append("sorted vote counts");
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // to ensure output is sorted
    job.setNumReduceTasks(1);

    // Mapper and Reducer Classes to use
    job.setMapperClass(VoteSorterMapper.class);
    job.setReducerClass(VoteSorterReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(VoteSorterReducer.class);
    }

    // sort in descending order
    job.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    // The Jar file to run
    job.setJarByClass(VoteSorterDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:de.bankmark.bigbench.queries.q18.MRlinearRegression.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    int NUMBER_REDUCERS = 1;
    Job job = Job.getInstance(getConf());

    job.setJarByClass(MRlinearRegression.class);
    if (args.length != 2) {
        usage(job);/*from   ww w .j  a  v  a  2  s  .co m*/
        return 2;
    }
    System.out.println("input:");
    job.setJobName(MRlinearRegression.class.getSimpleName() + "::" + args[0] + "->" + args[1]);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    System.out.println("Input: " + input + "  out -> " + output);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(MRlinearRegression.LRmapper.class);
    job.setReducerClass(MRlinearRegression.LRreducer.class);
    job.setNumReduceTasks(NUMBER_REDUCERS);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DoubleArrayWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.bankmark.bigbench.queries.q28.ToSequenceFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Job job = Job.getInstance(getConf());

    job.setJarByClass(ToSequenceFile.class);
    if (args.length != 2) {
        usage(job);//from   w w w . ja va 2s. c  om
        return 2;
    }
    System.out.println("input:");
    job.setJobName(ToSequenceFile.class.getSimpleName() + "::" + args[0] + "->" + args[1]);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    System.out.println("Input: " + input + "  out -> " + output);
    FileInputFormat.addInputPath(job, input);
    SequenceFileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(IdentityMapper.class);
    job.setReducerClass(Reducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;
}