List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:com.github.ygf.pagerank.InLinks.java
License:Apache License
private void computeInLinks(Configuration conf, Path linksFile, Path outputDir) throws Exception { // This job computes the number of in-links for every page. The // implementation is very similar to the classic word count example. Job job = Job.getInstance(conf, "InLinks:Computation"); job.setJarByClass(InLinks.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(InLinksMapper.class); job.setCombinerClass(InLinksReducer.class); job.setReducerClass(InLinksReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, linksFile); FileOutputFormat.setOutputPath(job, new Path(outputDir, "inlinks")); job.waitForCompletion(true);/* www . ja v a 2 s. c om*/ }
From source file:com.github.ygf.pagerank.PageRank.java
License:Apache License
private void pageRankIteration(int iter, Configuration conf, Path outputDir) throws Exception { // This job performs an iteration of the power iteration method to // compute PageRank. The map task processes each block M_{i,j}, loads // the corresponding stripe j of the vector v_{k-1} and produces the // partial result of the stripe i of the vector v_k. The reduce task // sums all the partial results of v_k and adds the teleportation factor // (the combiner only sums all the partial results). See Section 5.2 // (and 5.2.3 in particular) of Mining of Massive Datasets // (http://infolab.stanford.edu/~ullman/mmds.html) for details. The // output is written in a "vk" subdir of the output dir, where k is the // iteration number. MapFileOutputFormat is used to keep an array of the // stripes of v. Job job = Job.getInstance(conf, "PageRank:Iteration"); job.setJarByClass(PageRank.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(PageRankIterationMapper.class); job.setMapOutputKeyClass(ShortWritable.class); job.setMapOutputValueClass(FloatArrayWritable.class); job.setCombinerClass(PageRankIterationCombiner.class); job.setReducerClass(PageRankIterationReducer.class); job.setOutputFormatClass(MapFileOutputFormat.class); job.setOutputKeyClass(ShortWritable.class); job.setOutputValueClass(FloatArrayWritable.class); FileInputFormat.addInputPath(job, new Path(outputDir, "M")); FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter)); job.waitForCompletion(true);/*w ww. jav a 2s .c o m*/ }
From source file:com.hadoop.mapreduce.examples.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapreduce.app-submission.cross-platform", "true"); String ioArgs[] = { "input", "output2" }; String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/* w w w.j a v a 2 s. c o m*/ } //job Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); //map, combine, reduce job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java
License:Open Source License
private void runWordCount(Configuration cf, boolean compressIn, boolean compressOut) throws IOException, InterruptedException, ClassNotFoundException { Configuration thisConf = new Configuration(cf); if (compressIn) { thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-map", true); }// w w w . j a v a2 s .c om if (compressOut) { thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-reduce", true); } Path pathIn = new Path(TEST_ROOT_DIR + "/in"); Path pathOut = new Path(TEST_ROOT_DIR + "/out"); localFs.delete(pathIn, true); localFs.delete(pathOut, true); writeFile(makeFileName("in/part1", compressIn), "this is a test\nof word count test\ntest\n"); writeFile(makeFileName("in/part2", compressIn), "more test"); Job job = new Job(thisConf, "word count"); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); if (compressOut) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class); } FileInputFormat.addInputPath(job, pathIn); FileOutputFormat.setOutputPath(job, pathOut); job.submit(); assertEquals("IsLzoChecked (client)?", compressIn, LzoCodec.isNativeLzoChecked()); assertTrue(job.waitForCompletion(false)); String result = readFile(makeFileName("out/part-r-00000", compressOut)); System.out.println(result); assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", result); }
From source file:com.hhscyber.nl.tweets.processtweets.ProcessTweets.java
/** * @param args the command line arguments *///from w w w. j a v a2 s. c o m public static void main(String[] args) throws IOException { Job client = new Job(new Configuration()); client.setJarByClass(ProcessTweets.class); client.setOutputKeyClass(Text.class); client.setOutputValueClass(IntWritable.class); client.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(client, new Path("input_concat"));// TextOutputFormat.setOutputPath(client, new Path("output2")); client.setMapperClass(ProcessTweetsMapper.class); client.setReducerClass(ProcessTweetsReducer.class); client.setCombinerClass(ProcessTweetsReducer.class); try { client.submit(); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.huihui.mr.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { /*HadoopJava.util.Properties??Apache Jakarta Commons Configuration?? * ????API?org.apache.hadoop.conf.Configuration??? */// w ww . j a va2 s .c om Configuration conf = new Configuration(); /* * ?HadoopGenericOptionsParser ??? -D mapreduce.job.queuename ??getRemainingArgs()? ?"xrli/STJoin_in","xrli/STJoin_out"?otherArgs ? fs jt libjars files archives D tokenCacheFile */ String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } conf.set("fs.defaultFS", "hdfs://localhost:9000"); // Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); //??? job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //Path ???URI?Path???Path String input = "hdfs://localhost:9000/input/"; String output = "hdfs://localhost:9000/user/hdfs/log_kpi/browser1"; FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); //???? System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hzy.test.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { // String input = "hdfs://192.168.1.118:9000/user/hdfs/log_kpi/"; // String output = "hdfs://192.168.1.118:9000/user/hdfs/log_kpi/wc/"; String input = "/tmp/data.txt"; // String input = args[0]; String output = "/tmp/t1"; // String output = args[1]; Configuration conf = HdfsDAO.config(); // conf.set("mapreduce.framework.name", "yarn"); //// conf.set("hbase.zookeeper.quorum", "hadoop01:2181"); // conf.set("fs.default.name", "hdfs://hadoop01:9000"); // conf.set("yarn.resourcemanager.resource-tracker.address", "hadoop01:8031"); // conf.set("yarn.resourcemanager.address", "hadoop01:8032"); // conf.set("yarn.resourcemanager.scheduler.address", "hadoop01:8030"); // conf.set("yarn.resourcemanager.admin.address", "hadoop01:8033"); // conf.set("mapreduce.jobhistory.address", "hadoop01:10020"); // conf.set("mapreduce.jobhistory.webapp.address", "hadoop01:19888"); // String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // if (otherArgs.length < 2) { // System.err.println("Usage: wordcount <in> [<in>...] <out>"); // System.exit(2); // }/*w ww . ja va2 s .c om*/ Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(input)); // } FileOutputFormat.setOutputPath(job, new Path(output)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ifeng.hadoop.thinker.LogDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from w ww.j av a 2 s .c om Job job = Job.getInstance(super.getConf()); job.setJarByClass(getClass()); FileUtil.fullyDelete(new File(args[1])); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LogMapper.class); job.setReducerClass(LogReducer.class); job.setCombinerClass(LogReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.ifeng.hadoop.thinker.LogLocalDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from w w w . j a v a 2s . co m*/ Job job = Job.getInstance(super.getConf()); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LogMapper.class); job.setReducerClass(LogReducer.class); job.setCombinerClass(LogReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.ifeng.vdn.loggroup.mapper.VideologGroupDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from w w w. j ava 2s . c o m Job job = Job.getInstance(super.getConf()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(VideoLogGroupMapper.class); job.setReducerClass(VideologGroupReducer.class); job.setCombinerClass(VideologGroupReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }