List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass
public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:com.mycompany.searcher.Searcher.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); System.out.print("Please input a keyword:\t"); conf.set(KEYWORD, in.readLine());//from w w w . java 2 s .co m conf.set(MINIMUM, args[2]); Job job = Job.getInstance(conf, "keyword search"); job.setJarByClass(Searcher.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); clearOutput(conf, new Path(args[1])); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); TimeUnit.SECONDS.sleep(1); in = new BufferedReader( new InputStreamReader(FileSystem.get(conf).open(new Path(args[1] + "/part-r-00000")), "UTF-8")); String line; HashMap<String, Integer> map = new HashMap(); while ((line = in.readLine()) != null) { StringTokenizer tok = new StringTokenizer(line); map.put(tok.nextToken(), new Integer(tok.nextToken())); } List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet()); Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() { public int compare(Map.Entry<String, Integer> entry1, Map.Entry<String, Integer> entry2) { return (entry2.getValue() - entry1.getValue()); } }); for (Map.Entry<String, Integer> entry : list) { in = new BufferedReader( new InputStreamReader(FileSystem.get(conf).open(new Path(entry.getKey())), "UTF-8")); System.out.println("\n" + in.readLine()); System.out.println("\n" + in.readLine() + ":" + entry.getValue() + "\n"); } }
From source file:com.nearinfinity.blur.mapreduce.BlurTask.java
License:Apache License
public Job configureJob(Configuration configuration) throws IOException { if (getIndexingType() == INDEXING_TYPE.UPDATE) { checkTable();/* ww w . ja v a2s. c om*/ } ByteArrayOutputStream os = new ByteArrayOutputStream(); DataOutputStream output = new DataOutputStream(os); write(output); output.close(); String blurTask = new String(Base64.encodeBase64(os.toByteArray())); configuration.set(BLUR_BLURTASK, blurTask); Job job = new Job(configuration, "Blur Indexer"); job.setReducerClass(BlurReducer.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BlurMutate.class); job.setNumReduceTasks(getNumReducers(configuration)); return job; }
From source file:com.netflix.Aegisthus.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(Aegisthus.class); CommandLine cl = getOptions(args);/*from w w w .j a va 2 s .c o m*/ if (cl == null) { return 1; } job.setInputFormatClass(AegisthusInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(CassReducer.class); List<Path> paths = Lists.newArrayList(); if (cl.hasOption(OPT_INPUT)) { for (String input : cl.getOptionValues(OPT_INPUT)) { paths.add(new Path(input)); } } if (cl.hasOption(OPT_INPUTDIR)) { paths.addAll(getDataFiles(job.getConfiguration(), cl.getOptionValue(OPT_INPUTDIR))); } TextInputFormat.setInputPaths(job, paths.toArray(new Path[0])); TextOutputFormat.setOutputPath(job, new Path(cl.getOptionValue(OPT_OUTPUT))); job.submit(); System.out.println(job.getJobID()); System.out.println(job.getTrackingURL()); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.neu.cs6240.AvgTimeToAnswer.AvgTimeToAnsPerHashTag.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.textoutputformat.separator", ","); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: AvgTimeToAnsPerHashTag <in> <out>"); System.exit(2);/*from www .ja v a2 s . c om*/ } Job job = new Job(conf, "AvgTimeToAnsPerHashTag"); job.setJarByClass(AvgTimeToAnsPerHashTag.class); job.setMapperClass(AvgTimeToAnsPerHashTagMapper.class); job.setReducerClass(AvgTimeToAnsPerHashTagReducer.class); job.setCombinerClass(AvgTimeToAnsPerHashTagReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setPartitionerClass(AvgTimeToAnsPerHashTagPartitioner.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.neu.cs6240.TopKExperts.JoinQA.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: JoinQA <in> <out>"); System.exit(2);// w w w . ja v a2 s . c om } Job job = new Job(conf, "JoinQA"); job.setJarByClass(JoinQA.class); job.setMapperClass(JoinQAMapper.class); job.setReducerClass(JoinQAReducer.class); job.setOutputKeyClass(JoinQAKey.class); job.setOutputValueClass(JoinQAValue.class); job.setPartitionerClass(JoinQAPartitioner.class); job.setGroupingComparatorClass(JoinQAGroupComparator.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean isSucess = false; isSucess = job.waitForCompletion(true); if (isSucess) { // On successful completion of JoinQA start UserAnswerCountPerHashTag System.out.println("MR - JoinQA complete. Starting UserAnswerCountPerHashTag..."); String[] argsForMR2 = new String[2]; argsForMR2[0] = otherArgs[1]; argsForMR2[1] = otherArgs[1] + "MR2"; isSucess = UserAnswerCountPerHashTag.initUserAnswerCountPerHashTag(argsForMR2); if (isSucess) { // On successful completion of UserAnswerCountPerHashTag start TopKPerHashTag System.out.println("MR - UserAnswerCountPerHashTag complete. Starting TopKPerHashTag..."); String[] argsForMR3 = new String[2]; argsForMR3[0] = argsForMR2[1]; argsForMR3[1] = argsForMR2[1] + "MR3"; isSucess = TopKPerHashTag.initTopKPerHashTag(argsForMR3); if (isSucess) { // Successfully complete TopKPerHashTag MR System.out.println("All MR - Successful."); } else { // Failed UserAnswerCountPerHashTag MR System.out.println("MR - TopKPerHashTag failed."); } } else { // On unsuccessful completion of JoinQA end MR System.out.println("MR - UserAnswerCountPerHashTag failed."); } } else { // On unsuccessful completion of JoinQA end MR System.out.println("MR - JoinQA failed."); } System.exit(isSucess ? 0 : 1); }
From source file:com.neu.cs6240.TopKExperts.TopKPerHashTag.java
License:Apache License
public static boolean initTopKPerHashTag(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: TopKPerHashTag <in> <out>"); System.exit(2);/*from www . jav a2 s. co m*/ } Job job = new Job(conf, "TopKPerHashTag"); job.setJarByClass(TopKPerHashTag.class); job.setMapperClass(TopKPerHashTagMapper.class); job.setReducerClass(TopKPerHashTagReducer.class); job.setOutputKeyClass(TopKPerHashTagKey.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(TopKPerHashTagPartitioner.class); job.setGroupingComparatorClass(TopKPerHashTagGroupComparator.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); return job.waitForCompletion(true); }
From source file:com.neu.cs6240.TopKExperts.UserAnswerCountPerHashTag.java
License:Apache License
public static boolean initUserAnswerCountPerHashTag(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: UserAnswerCountPerHashTag <in> <out>"); System.exit(2);//from w ww . j a v a 2 s .c o m } Job job = new Job(conf, "UserAnswerCountPerHashTag"); job.setJarByClass(UserAnswerCountPerHashTag.class); job.setMapperClass(UserAnswerCountPerHashTagMapper.class); job.setReducerClass(UserAnswerCountPerHashTagReducer.class); job.setOutputKeyClass(UserAnswerCountPerHashTagKey.class); job.setOutputValueClass(IntWritable.class); job.setPartitionerClass(UserAnswerCountPerHashTagPartitioner.class); job.setGroupingComparatorClass(UserAnswerCountPerHashTagGroupComparator.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); return job.waitForCompletion(true); }
From source file:com.neu.cs6240.Xml2csvComments.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Setting up the xml tag configurator for splitter conf.set("xmlinput.start", "<row "); conf.set("xmlinput.end", " />"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Xml2csvPosts <in> <out>"); System.exit(2);//from w ww .j a v a 2 s.c om } Job job = new Job(conf, "Converts Posts.xml to .csv"); job.setJarByClass(Xml2csvPosts.class); job.setInputFormatClass(XmlInputFormat.class); job.setMapperClass(CommentsMapper.class); job.setReducerClass(CommentsReducer.class); job.setPartitionerClass(PostsPartitioner.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set as per your file size job.setNumReduceTasks(10); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.neu.cs6240.Xml2csvPosts.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Setting up the xml tag configurator for splitter conf.set("xmlinput.start", "<row "); conf.set("xmlinput.end", " />"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Xml2csvPosts <in> <out>"); System.exit(2);/*from w ww . j av a 2 s . c om*/ } Job job = new Job(conf, "Converts Posts.xml to .csv"); job.setJarByClass(Xml2csvPosts.class); job.setInputFormatClass(XmlInputFormat.class); job.setMapperClass(PostsMapper.class); job.setReducerClass(PostsReducer.class); job.setPartitionerClass(PostsPartitioner.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set as per your file size job.setNumReduceTasks(15); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormat2.java
License:Apache License
static void configureIncrementalLoad(Job job, HTable table, Class<? extends OutputFormat<?, ?>> cls) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(HFileOutputFormat2.class); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else {//from www . j a v a2s .c o m LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName())); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(table, conf); configureBloomType(table, conf); configureBlockSize(table, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured."); }