List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass
public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java
License:Apache License
public static void configureIncrementalLoad(Job job, HTable table, Class<? extends OutputFormat<?, ?>> cls) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(HFileOutputFormatBase.class); // Based on the configured map output class, set the correct reducer to // properly//from w w w.j av a 2s. com // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName())); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(table, conf); configureBloomType(table, conf); configureBlockSize(table, conf); // TableMapReduceUtil.addDependencyJars(job);// TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured."); }
From source file:com.neusoft.hbase.test.hadoop.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// ww w .ja v a 2 s . c o m } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.nistfortunetellers.cleaning.NISTClean.java
License:Apache License
/** Runs a Job that is Text in and Out, and TextInput in and out, too! */ @SuppressWarnings({ "deprecation", "rawtypes" }) static void runTextJob(String jobName, Configuration jobConfig, String inputPath, String outputPath, Class<? extends Mapper> mapper, Class<? extends Reducer> reducer) { try {// w w w .ja v a 2s. co m Job genericJob = new Job(jobConfig, jobName); // DEBUG //genericJob.setNumReduceTasks(0); // END DEBUG genericJob.setJarByClass(NISTClean.class); genericJob.setOutputKeyClass(Text.class); genericJob.setOutputValueClass(Text.class); genericJob.setMapperClass(mapper); genericJob.setReducerClass(reducer); genericJob.setInputFormatClass(TextInputFormat.class); genericJob.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(genericJob, new Path(inputPath)); FileOutputFormat.setOutputPath(genericJob, new Path(outputPath)); genericJob.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:com.niuwa.hadoop.chubao.job.IndicatorJob002.java
License:Apache License
@Override public void setJobSpecialInfo(Job job, Configuration conf, RunParams params, Map<String, Path> tempPaths) throws Exception { job.setMapperClass(IndicatorJob002.UserIdMapper.class); job.setCombinerClass(IndicatorJob002.CombinerSumReducer.class); job.setReducerClass(IndicatorJob002.IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // //from w ww . ja v a2s. c om FileInputFormat.addInputPath(job, ChubaoJobConfig.getInputPath(ChubaoJobConfig.INPUT_CONTACT)); // FileOutputFormat.setOutputPath(job, tempPaths.get(IndicatorJob002.class.getName())); // HadoopUtil.deleteOutputFile(tempPaths.get(IndicatorJob002.class.getName())); }
From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java
License:Apache License
public static void main(String[] args) throws Exception { HadoopUtil.isWinOrLiux();//from w w w.j av a 2 s . co m Configuration conf = new Configuration(); String path = "hdfs://ns1:9000/user/root"; if (args.length != 0) { path = args[0]; } String[] args_1 = new String[] { path + "/chubao/input/contact", path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total", path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" }; String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs(); // job Job job = Job.getInstance(conf, "word count"); job.setJarByClass(JobControlTest.class); job.setMapperClass(UserIdMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); // deleteOutputFile(otherArgs[1], otherArgs[0]); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // job Job job2 = Job.getInstance(conf, "job2"); job2.setJarByClass(JobControlTest.class); job2.setMapperClass(AddDateMapper.class); job2.setReducerClass(Job2Reducer.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job2, new Path(otherArgs[1])); // deleteOutputFile(otherArgs[2], otherArgs[1]); FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2])); // ControlledJob ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration()); ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration()); // ? controlledJob2.addDependingJob(controlledJob1); // JobControl JobControl jobControl = new JobControl("JobControlDemoGroup"); jobControl.addJob(controlledJob1); jobControl.addJob(controlledJob2); // ? Thread jobControlThread = new Thread(jobControl); jobControlThread.start(); while (true) { if (jobControl.allFinished()) { System.out.println(jobControl.getSuccessfulJobList()); jobControl.stop(); break; } } }
From source file:com.niuwa.hadoop.jobs.sample.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { HadoopUtil.isWinOrLiux();//from w w w .j av a 2s. co m Configuration conf = new Configuration(); args = new String[] { "hdfs://192.168.101.219:9000/user/root/input", "hdfs://192.168.101.219:9000/user/root/output/count" + new Date().getTime() }; String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.obomprogramador.bigdata.sentiment.sentimentanalysis.Sentiment.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 3) { System.err.println("Usage: Sentiment <sentiwordnet file> <in> <out>"); System.exit(2);/*from w w w. j a v a 2 s.c om*/ } System.out.println("Param1: " + otherArgs[0] + " Param2: " + otherArgs[1] + " Param3: " + otherArgs[2]); conf.set("sentwordnetfile", otherArgs[0]); Job job = Job.getInstance(conf); job.setJarByClass(Sentiment.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.pagerankcalculator.TwitterPageRank.java
/** * Graph Parsing/*w w w. java2 s . co m*/ * Memasukan data mentah dan melakukan inisialisasi pagerank * * @param in file data masukan * @param out direktori output */ public int parseGraph(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#1 Parsing Graph"); job.setJarByClass(TwitterPageRank.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(GraphParsingMapper.class); job.setReducerClass(GraphParsingReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pagerankcalculator.TwitterPageRank.java
public int calculatePagerank(String in, String out, int iteration) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#2 Iteration-" + iteration + " Calculating Page Rank"); job.setJarByClass(TwitterPageRank.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(PageRankCalculationMapper.class); job.setReducerClass(PageRankCalculationReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); }/*from w ww . java2 s . c o m*/ return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pagerankcalculator.TwitterPageRank.java
public int sortPagerank(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#3 Sorting Page Rank"); job.setJarByClass(TwitterPageRank.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(PageRankSortingMapper.class); job.setReducerClass(PageRankSortingReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(1);/*from w w w.j a va2 s . com*/ LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); job.setSortComparatorClass(DoubleSortDescComparator.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); } return job.waitForCompletion(true) ? 0 : 1; }