List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:cp_c.CP_C.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(CP_C.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; }/*from w ww . j a va2s . co m*/ /*[*/job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Mapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(1); job.setReducerClass(Reducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class);/*]*/ return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; }/*ww w.jav a 2 s. c om*/ job.setMapperClass(StationMapper.class); job.setMapOutputKeyClass(Text.class); job.setReducerClass(MultipleOutputsReducer.class); job.setOutputKeyClass(NullWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { JobBuilder.printUsage(this, "<ncdc input> <station input> <output>"); return -1; }/* w w w . j av a 2s. c o m*/ Job job = new Job(getConf(), "Join weather records with station names"); job.setJarByClass(getClass()); Path ncdcInputPath = new Path(args[0]); Path stationInputPath = new Path(args[1]); Path outputPath = new Path(args[2]); MultipleInputs.addInputPath(job, ncdcInputPath, TextInputFormat.class, JoinRecordMapper.class); MultipleInputs.addInputPath(job, stationInputPath, TextInputFormat.class, JoinStationMapper.class); FileOutputFormat.setOutputPath(job, outputPath); /*[*/job.setPartitionerClass(KeyPartitioner.class); job.setGroupingComparatorClass(TextPair.FirstComparator.class);/*]*/ job.setMapOutputKeyClass(TextPair.class); job.setReducerClass(JoinReducer.class); job.setOutputKeyClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(MaxWidgetId.class); job.setMapperClass(MaxWidgetMapper.class); job.setReducerClass(MaxWidgetReducer.class); FileInputFormat.addInputPath(job, new Path("widgets")); FileOutputFormat.setOutputPath(job, new Path("maxwidget")); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Widget.class); job.setOutputKeyClass(Widget.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1);//from w ww. jav a 2 s. c om if (!job.waitForCompletion(true)) { return 1; // error. } return 0; }
From source file:cs6240.project.decisiontree.Pseudohigstest.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); DistributedCache.addCacheFile(new URI("s3://hr6240/higs/testing/5/higshistogram"), conf); // DistributedCache.addCacheFile(new // URI("/home/hraj17/Downloads/part-hig"),conf); Job job = new Job(conf, "word count"); job.setJarByClass(Pseudohigstest.class); job.setMapperClass(TestingMapper.class); job.setReducerClass(TestingReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setPartitionerClass(TestingPartioner.class); job.setNumReduceTasks(2);/*from w ww.j a v a 2 s. c o m*/ FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:cs6240.project.decisiontree.Pseudotestingtwitter.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); DistributedCache.addCacheFile(new URI("s3://hr6240/histogram/5/metadata5"), conf); Job job = new Job(conf, "word count"); job.setJarByClass(Pseudotestingtwitter.class); job.setMapperClass(TestingMapper.class); job.setReducerClass(TestingReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setPartitionerClass(TestingPartioner.class); job.setNumReduceTasks(2);// w w w. j ava 2s . co m FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:csc555.ebratt.depaul.edu.AverageScoreDriver.java
License:Open Source License
/** * // www. j a va 2s .com * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the AverageScoreReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("average score by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), AverageScoreDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(AverageScoreMapper.class); job.setReducerClass(AverageScoreReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(AverageScoreReducer.class); } // The Jar file to run job.setJarByClass(AverageScoreDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.AverageScoreRankerDriver.java
License:Open Source License
/** * //from www. j a v a2 s . c o m * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the AverageScoreRankerReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJobName("average score ranked"); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(AverageScoreRankerMapper.class); job.setReducerClass(AverageScoreRankerReducer.class); // Mapper output classes job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(AverageScoreRankerReducer.class); } // sort in descending order job.setSortComparatorClass(DoubleWritableDescendingComparator.class); // The Jar file to run job.setJarByClass(AverageScoreRankerDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.GildedCountDriver.java
License:Open Source License
/** * // ww w . ja v a 2 s . c om * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the GildedCountReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("count of gilded comments grouped by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), GildedCountDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(GildedCountMapper.class); job.setReducerClass(LongSumReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(LongSumReducer.class); } // The Jar file to run job.setJarByClass(GildedCountDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }