List of usage examples for org.apache.hadoop.mapred JobConf setCombinerClass
public void setCombinerClass(Class<? extends Reducer> theClass)
From source file:buildtestproject.MyFirstMapReduce.java
public static void main(String[] args) throws Exception { //Configuration conf = new Configuration(); JobConf conf = new JobConf(MyFirstMapReduce.class); //Job job = Job.getInstance(conf, "word-count-one"); conf.setJobName("word-count-one"); conf.setMapperClass(TokenizerMapper.class); conf.setCombinerClass(IntSumReducer.class); conf.setReducerClass(IntSumReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // job.setJarByClass(MyFirstMapReduce.class); // job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); // /*from w ww . jav a2s.c om*/ // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); // FileInputFormat.addInputPath(job, new Path(args[0])); // FileOutputFormat.setOutputPath(job, new Path(args[1])); JobClient.runJob(conf); // System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:cn.edu.xmu.dm.mapreduce.MultiFileWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage();/* w ww. j a v a2 s .c o m*/ return 1; } JobConf job = new JobConf(getConf(), MultiFileWordCount.class); job.setJobName("MultiFileWordCount"); // set the InputFormat of the job to our InputFormat job.setInputFormat(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(LongWritable.class); // use the defined mapper job.setMapperClass(MapClass.class); // use the WordCount Reducer job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); JobClient.runJob(job); return 0; }
From source file:com.acme.extensions.mr.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { JobConf job = new JobConf(); String[] otherArgs = new GenericOptionsParser(job, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from ww w. j a va2 s .co m*/ } job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); JobClient.runJob(job); }
From source file:com.aerospike.hadoop.examples.wordcountinput.WordCountInput.java
License:Apache License
public int run(final String[] args) throws Exception { log.info("run starting"); final Configuration conf = getConf(); JobConf job = new JobConf(conf, WordCountInput.class); job.setJobName("AerospikeWordCountInput"); job.setInputFormat(AerospikeInputFormat.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[0])); JobClient.runJob(job);/*from ww w .j a va2s. c om*/ log.info("finished"); return 0; }
From source file:com.aerospike.hadoop.examples.wordcountoutput.WordCountOutput.java
License:Apache License
public int run(final String[] args) throws Exception { log.info("run starting"); final Configuration conf = getConf(); JobConf job = new JobConf(conf, WordCountOutput.class); job.setJobName("AerospikeWordCountOutput"); for (int ii = 0; ii < args.length; ++ii) { FileInputFormat.addInputPath(job, new Path(args[ii])); }/*from w ww . ja v a 2 s. c om*/ job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormat(MyOutputFormat.class); JobClient.runJob(job); log.info("finished"); return 0; }
From source file:com.benchmark.mapred.Grep.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 4) { System.out.println("Grep <inDir> <outDir> <numreduces> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }//from ww w.j av a2s .co m Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf grepJob = new JobConf(getConf(), Grep.class); try { Date startIteration = new Date(); grepJob.setJobName("grep-search"); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.set("mapred.mapper.regex", args[3]); if (args.length == 5) grepJob.set("mapred.mapper.regex.group", args[4]); grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, new Path(args[1])); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); grepJob.setNumReduceTasks(Integer.parseInt(args[2])); JobClient.runJob(grepJob); Date endIteration = new Date(); System.out.println("The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000 + " seconds."); } finally { FileSystem.get(grepJob).delete(tempDir, true); } return 0; }
From source file:com.benchmark.mapred.MultiFileWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage();//from ww w . j a v a2 s. c o m return 1; } JobConf job = new JobConf(getConf(), MultiFileWordCount.class); job.setJobName("MultiFileWordCount"); //set the InputFormat of the job to our InputFormat job.setInputFormat(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(LongWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); JobClient.runJob(job); return 0; }
From source file:com.chriscx.mapred.Driver.java
public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), Driver.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if ("-skip".equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf); conf.setBoolean("wordcount.skip.patterns", true); } else {//from www .jav a2s . com other_args.add(args[i]); } } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:com.chriscx.matching.Driver.java
public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), com.chriscx.mapred.Driver.class); conf.setJobName("Matching"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if ("-skip".equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf); conf.setBoolean("wordcount.skip.patterns", true); } else {//from w w w. j av a 2s. c o m other_args.add(args[i]); } } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:com.cloudera.hbase.OldWordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// ww w. j a va 2 s.c o m } conf.setJobName("word count"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1])); JobClient.runJob(conf); return; }