Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package reverseIndexer; import countTheGivenWords.searchAndCountMapper; import countTheGivenWords.searchAndCountReducer; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.ShuffleConsumerPlugin.Context; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.mapred.WordCount; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.util.GenericOptionsParser; /** * * @author madhatter */ public class reverseIndexer { public static void startJob(String[] args) { try { /* Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: ReverseIndexer <output> <input file(s)>"); System.exit(2); } Job job = new Job(conf, "reverse indexer"); job.setJarByClass(this.getClass()); job.setMapperClass(IndexerMapper.class); job.setReducerClass(IndexerReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LineRecWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 1; i < otherArgs.length; i++) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[0])); System.exit(job.waitForCompletion(true) ? 0 : 1);*/ JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); reverseIndexMapper Map = new reverseIndexMapper(); conf.setMapperClass((Class<? extends org.apache.hadoop.mapred.Mapper>) Map.getClass()); reverseIndexReducer Reduce = new reverseIndexReducer(); conf.setCombinerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) Reduce.getClass()); conf.setReducerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); } catch (Exception Exp) { Exp.printStackTrace(); } } }