List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass
public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException
From source file:org.sifarish.social.RatingPredictor.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Rating predictor MR"; job.setJobName(jobName);// ww w. j a va 2 s .co m job.setJarByClass(RatingPredictor.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(RatingPredictor.PredictionMapper.class); job.setReducerClass(RatingPredictor.PredictorReducer.class); job.setMapOutputKeyClass(TextInt.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(ItemIdGroupComprator.class); job.setPartitionerClass(ItemIdPartitioner.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("rap.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.terrier.structures.indexing.NewCompressingMetaIndexBuilder.java
License:Mozilla Public License
public static void reverseAsMapReduceJob(Index index, String structureName, String[] keys, Configuration conf) throws Exception { long time = System.currentTimeMillis(); Job job = new Job(conf); job.setMapOutputKeyClass(KeyValueTuple.class); job.setMapOutputValueClass(IntWritable.class); job.setMapperClass(MetaIndexMapper.class); job.setReducerClass(MetaIndexReducer.class); job.setNumReduceTasks(keys.length);//from w ww . j av a 2 s . com job.setPartitionerClass(KeyedPartitioner.class); job.setInputFormatClass(CompressingMetaIndexInputFormat.class); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().set("MetaIndexInputStreamRecordReader.structureName", structureName); job.getConfiguration().setInt("CompressingMetaIndexBuilder.reverse.keyCount", keys.length); job.getConfiguration().set("CompressingMetaIndexBuilder.reverse.keys", ArrayUtils.join(keys, ",")); job.getConfiguration().set("CompressingMetaIndexBuilder.forward.valueLengths", index.getIndexProperty("index." + structureName + ".value-lengths", "")); job.getConfiguration().set("CompressingMetaIndexBuilder.forward.keys", index.getIndexProperty("index." + structureName + ".key-names", "")); FileOutputFormat.setOutputPath(job, new Path(index.getPath(), ".meta")); HadoopUtility.toHConfiguration(index, job.getConfiguration()); //job.setOutputFormatClass(NullOutputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setJarByClass(NewCompressingMetaIndexBuilder.class); try { if (!job.waitForCompletion(true)) throw new Exception("job failed"); } catch (Exception e) { throw new Exception("Problem running job to reverse metadata", e); } //only update the index from the controlling process, so that we dont have locking/concurrency issues index.setIndexProperty("index." + structureName + ".reverse-key-names", ArrayUtils.join(keys, ",")); index.flush(); logger.info("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds"); }
From source file:org.visitante.basic.EngagementEventGenerator.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "web log user engaement event generator MR"; job.setJobName(jobName);//from www .j a va2 s .c om job.setJarByClass(EngagementEventGenerator.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "visitante"); job.setMapperClass(SessionExtractor.SessionMapper.class); job.setReducerClass(EngagementEventGenerator.SessionReducer.class); job.setMapOutputKeyClass(TextLong.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SessionIdGroupComprator.class); job.setPartitionerClass(SessionIdPartitioner.class); job.setNumReduceTasks(job.getConfiguration().getInt("ee.num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.visitante.basic.SessionExtractor.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "web log session extraction MR"; job.setJobName(jobName);/*from ww w. j a v a2 s . com*/ job.setJarByClass(SessionExtractor.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "visitante"); job.setMapperClass(SessionExtractor.SessionMapper.class); job.setReducerClass(SessionExtractor.SessionReducer.class); job.setMapOutputKeyClass(TextLong.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SessionIdGroupComprator.class); job.setPartitionerClass(SessionIdPartitioner.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.visitante.basic.SessionSummarizer.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "web log session summarizer MR"; job.setJobName(jobName);/*w w w. j av a2 s. c o m*/ job.setJarByClass(SessionSummarizer.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "visitante"); job.setMapperClass(SessionExtractor.SessionMapper.class); job.setReducerClass(SessionSummarizer.SessionReducer.class); job.setMapOutputKeyClass(TextLong.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SessionIdGroupComprator.class); job.setPartitionerClass(SessionIdPartitioner.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.visitante.basic.UserSessionSummary.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "web log user session summary MR"; job.setJobName(jobName);/*from www. j a v a2s . c o m*/ job.setJarByClass(UserSessionSummary.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "visitante"); job.setMapperClass(UserSessionSummary.SessionMapper.class); job.setReducerClass(UserSessionSummary.SessionReducer.class); job.setMapOutputKeyClass(TextLong.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SessionExtractor.SessionIdGroupComprator.class); job.setPartitionerClass(SessionExtractor.SessionIdPartitioner.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:pad.StarDriver.java
License:Apache License
/** * Execute the StarDriver Job./* ww w. j a v a 2 s.co m*/ * @param args array of external arguments, not used in this method * @return <c>1</c> if the StarDriver Job failed its execution; <c>0</c> if everything is ok. * @throws Exception */ public int run(String[] args) throws Exception { Configuration conf = new Configuration(); // GenericOptionsParser invocation in order to suppress the hadoop warning. new GenericOptionsParser(conf, args); conf.set("type", this.type.toString()); Job job = new Job(conf, this.title); job.setJarByClass(StarDriver.class); job.setMapOutputKeyClass(NodesPairWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(StarMapper.class); job.setCombinerClass(StarCombiner.class); job.setPartitionerClass(NodePartitioner.class); job.setGroupingComparatorClass(NodeGroupingComparator.class); job.setReducerClass(StarReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, this.input); FileOutputFormat.setOutputPath(job, this.output); if (!job.waitForCompletion(verbose)) return 1; // Set up the private variable looking to the counter value this.numChanges = job.getCounters().findCounter(UtilCounters.NUM_CHANGES).getValue(); return 0; }
From source file:pad.TerminationDriver.java
License:Apache License
/** * Execute the TerminationDriver Job./* w ww .j a va 2s. c o m*/ * @param args array of external arguments, not used in this method * @return <c>1</c> if the TerminationDriver Job failed its execution; <c>0</c> if everything is ok. * @throws Exception */ public int run(String[] args) throws Exception { Configuration conf = new Configuration(); // GenericOptionsParser invocation in order to suppress the hadoop warning. new GenericOptionsParser(conf, args); Job job = new Job(conf, "TerminationDriver"); job.setJarByClass(TerminationDriver.class); job.setMapOutputKeyClass(NodesPairWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ClusterWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(TerminationMapper.class); job.setPartitionerClass(NodePartitioner.class); job.setGroupingComparatorClass(NodeGroupingComparator.class); job.setReducerClass(TerminationReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, this.input); FileOutputFormat.setOutputPath(job, this.output); if (!job.waitForCompletion(this.verbose)) return 1; // Set up the private variables looking to the counters value this.numNodes = job.getCounters().findCounter(UtilCounters.NUM_NODES).getValue(); this.numClusters = job.getCounters().findCounter(UtilCounters.NUM_CLUSTERS).getValue(); return 0; }
From source file:PartitioningNYSE.NYSEPart.java
public int run(String[] args) throws Exception, ClassNotFoundException { Configuration conf = getConf(); Job job = new Job(conf, "IPcount"); job.setJarByClass(NYSEPart.class); //final File f = new File(NYSEPart.class.getProtectionDomain().getCodeSource().getLocation().getPath()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setMapperClass(IPcountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(IPcountReducer.class); job.setReducerClass(IPcountReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setPartitionerClass(IPcountPartitioner.class); job.setNumReduceTasks(7);//w ww . j av a 2s .c o m System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:Patterns.A3_Partitioning.Partition_Users_By_Country_Driver.java
/** * @param args the command line arguments *//*www. java2 s . c o m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Users by Country"); job.setJarByClass(Partition_Users_By_Country_Driver.class); job.setMapperClass(Partition_Users_By_Country_Mapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // partitioner class inclusion job.setPartitionerClass(Partition_Users_By_Country_Partitioner.class); // set multiple formats for custom naming partitioning MultipleOutputs.addNamedOutput(job, "countryBins", TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); // set num of reduce tasks based on partition we need (here we need 10 cos total no.of countries) job.setNumReduceTasks(11); job.setReducerClass(Partition_Users_By_Country_Reducer.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }