List of usage examples for org.apache.hadoop.mapreduce Job setSortComparatorClass
public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException
From source file:top25products.Top25ProductChaining.java
/** * @param args the command line arguments *///from w w w . j a v a 2 s. c o m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { //job 1 Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Job: 1, top 25 Products based on ratings"); job.setJarByClass(top25products.Top25ProductChaining.class); job.setMapperClass(FirstMapper.class); job.setReducerClass(FirstReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path firstJobOutput = new Path(args[1]); FileOutputFormat.setOutputPath(job, firstJobOutput); job.waitForCompletion(true); //job 2 Job job2 = Job.getInstance(conf, "Job: 2, top 25 Products based on ratings"); job2.setJarByClass(top25products.Top25ProductChaining.class); job2.setMapperClass(SecondMapper.class); job2.setReducerClass(SecondReducer.class); job2.setSortComparatorClass(SortKeyComparator.class); job2.setMapOutputKeyClass(DoubleWritable.class); job2.setMapOutputValueClass(Text.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job2, firstJobOutput); String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss", Locale.US) .format(new Timestamp(System.currentTimeMillis())); FileOutputFormat.setOutputPath(job2, new Path(args[1] + timeStamp)); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
From source file:uk.ac.cam.eng.extraction.hadoop.features.phrase.Source2TargetJob.java
License:Apache License
public static Job getJob(Configuration conf) throws IOException { conf.set("mapred.map.child.java.opts", "-Xmx200m"); conf.set("mapred.reduce.child.java.opts", "-Xmx5128m"); conf.setBoolean(MarginalReducer.SOURCE_TO_TARGET, true); Job job = new Job(conf); job.setJarByClass(Source2TargetJob.class); job.setJobName("Source2Taget"); job.setSortComparatorClass(Source2TargetComparator.class); job.setPartitionerClass(Source2TargetPartitioner.class); job.setMapperClass(KeepProvenanceCountsOnlyMapper.class); job.setReducerClass(MarginalReducer.class); job.setMapOutputKeyClass(RuleWritable.class); job.setMapOutputValueClass(ProvenanceCountMap.class); job.setOutputKeyClass(RuleWritable.class); job.setOutputValueClass(FeatureMap.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); return job;/* w w w. j a v a 2 s. c o m*/ }
From source file:uk.ac.cam.eng.extraction.hadoop.features.phrase.Target2SourceJob.java
License:Apache License
public static Job getJob(Configuration conf) throws IOException { conf.set("mapred.map.child.java.opts", "-Xmx200m"); conf.set("mapred.reduce.child.java.opts", "-Xmx5128m"); conf.setBoolean(MarginalReducer.SOURCE_TO_TARGET, false); Job job = new Job(conf); job.setJarByClass(Target2SourceJob.class); job.setJobName("Target2Source"); job.setSortComparatorClass(Target2SourceComparator.class); job.setPartitionerClass(Target2SourcePartitioner.class); job.setMapperClass(SwappingMapper.class); job.setReducerClass(MarginalReducer.class); job.setMapOutputKeyClass(RuleWritable.class); job.setMapOutputValueClass(ProvenanceCountMap.class); job.setOutputKeyClass(RuleWritable.class); job.setOutputValueClass(FeatureMap.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); return job;//from w ww .j a v a 2 s.c o m }
From source file:uk.ac.cam.eng.extraction.hadoop.merge.MergeJob.java
License:Apache License
public static Job getJob(Configuration conf) throws IOException { conf.set("mapred.map.child.java.opts", "-Xmx200m"); conf.set("mapred.reduce.child.java.opts", "-Xmx10240m"); Job job = new Job(conf); job.setJarByClass(MergeJob.class); job.setJobName("Merge"); job.setSortComparatorClass(MergeComparator.class); job.setPartitionerClass(MergePartitioner.class); job.setReducerClass(MergeReducer.class); job.setCombinerClass(MergeCombiner.class); job.setMapOutputKeyClass(RuleWritable.class); job.setMapOutputValueClass(AlignmentAndFeatureMap.class); job.setOutputKeyClass(RuleWritable.class); job.setOutputValueClass(AlignmentAndFeatureMap.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SimpleHFileOutputFormat.class); return job;// w w w . j av a 2s . c om }