List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass
public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException
From source file:ph.fingra.hadoop.mapred.parts.prerole.BasekeysMaker.java
License:Apache License
public Job createJobAppkey(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce, FingraphConfig finconfig) throws IOException { conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose()); conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter()); Job job = new Job(conf); String jobName = "prerole/appkey job"; job.setJobName(jobName);/*from w w w . j a va2s . co m*/ job.setJarByClass(BasekeysMaker.class); for (int i = 0; i < inputpaths.length; i++) { FileInputFormat.addInputPath(job, inputpaths[i]); } FileOutputFormat.setOutputPath(job, outputpath); job.setMapperClass(AppkeyMapper.class); job.setReducerClass(AppkeyReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(AppkeyPartitioner.class); job.setNumReduceTasks(numreduce); return job; }
From source file:ph.fingra.hadoop.mapred.parts.prerole.BasekeysMaker.java
License:Apache License
public Job createJobComponentkey(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce, FingraphConfig finconfig) throws IOException { conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose()); conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter()); Job job = new Job(conf); String jobName = "prerole/componentkey job"; job.setJobName(jobName);//from w w w. j ava2s . co m job.setJarByClass(BasekeysMaker.class); for (int i = 0; i < inputpaths.length; i++) { FileInputFormat.addInputPath(job, inputpaths[i]); } FileOutputFormat.setOutputPath(job, outputpath); job.setMapperClass(ComponentkeyMapper.class); job.setReducerClass(ComponentkeyReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(ComponentkeyPartitioner.class); job.setNumReduceTasks(numreduce); return job; }
From source file:ph.fingra.hadoop.mapred.parts.prerole.ComponentNewuserMerge.java
License:Apache License
public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce, FingraphConfig finconfig, Path dbpath, String dbfilename, TargetDate cutdate) throws IOException { conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose()); conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter()); conf.set("dbfilename", dbfilename); conf.set("cutyear", cutdate.getYear()); conf.set("cutmonth", cutdate.getMonth()); conf.set("cutday", cutdate.getDay()); Job job = new Job(conf); String jobName = "merge/componentnewusermerge job"; job.setJobName(jobName);//from w w w. ja v a 2 s . c om job.setJarByClass(ComponentNewuserMerge.class); for (int i = 0; i < inputpaths.length; i++) { FileInputFormat.addInputPath(job, inputpaths[i]); } if (dbpath != null) { FileInputFormat.addInputPath(job, dbpath); } FileOutputFormat.setOutputPath(job, outputpath); job.setMapperClass(ComponentNewuserMapper.class); job.setCombinerClass(ComponentNewuserCombiner.class); job.setReducerClass(ComponentNewuserReducer.class); job.setMapOutputKeyClass(ComponentNewuserKey.class); job.setMapOutputValueClass(ComponentNewuserDb.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(ComponentNewuserPartitioner.class); job.setNumReduceTasks(numreduce); return job; }
From source file:ph.fingra.hadoop.mapred.parts.prerole.LogCountStatistic.java
License:Apache License
public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce, FingraphConfig finconfig) throws IOException { conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose()); conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter()); Job job = new Job(conf); String jobName = "prerole/logcount job"; job.setJobName(jobName);// www.j a v a 2 s. c o m job.setJarByClass(LogCountStatistic.class); for (int i = 0; i < inputpaths.length; i++) { FileInputFormat.addInputPath(job, inputpaths[i]); } FileOutputFormat.setOutputPath(job, outputpath); job.setMapperClass(LogCountMapper.class); job.setCombinerClass(LogCountReducer.class); job.setReducerClass(LogCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setPartitionerClass(LogCountPartitioner.class); job.setNumReduceTasks(numreduce); return job; }
From source file:ph.fingra.hadoop.mapred.parts.prerole.PreTransform.java
License:Apache License
public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce, FingraphConfig finconfig) throws IOException { conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose()); conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter()); Job job = new Job(conf); String jobName = "prerole/pretransform job"; job.setJobName(jobName);/*from w ww . ja v a2 s . c om*/ job.setJarByClass(PreTransform.class); for (int i = 0; i < inputpaths.length; i++) { FileInputFormat.addInputPath(job, inputpaths[i]); } FileOutputFormat.setOutputPath(job, outputpath); job.setMapperClass(PreTransformMapper.class); job.setReducerClass(PreTransformReducer.class); job.setMapOutputKeyClass(TransformKey.class); job.setMapOutputValueClass(TransformContainer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(PreTransformPartitioner.class); job.setSortComparatorClass(PreTransformSortComparator.class); job.setGroupingComparatorClass(PreTransformGroupComparator.class); job.setNumReduceTasks(numreduce); return job; }
From source file:planattributescleaning.PlanAttributesCleaning.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = new Job(conf, "attributes cleaning"); job.setJarByClass(PlanAttributesCleaning.class); job.setMapperClass(Events_Mapper1.class); job.setReducerClass(Events_Reducer1.class); job.setPartitionerClass(GroupPartitioner.class); job.setNumReduceTasks(4);/*www .j a v a 2s . c o m*/ job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0])); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:proj.analysis.uscensus.USCensusFirstJob.java
/** * @param args the command line arguments *//*from w w w . j a v a 2 s .com*/ public static void main(String[] args) { try { Configuration conf = new Configuration(); // Give the MapRed job a name. You'll see this name in the Yarn // webapp. Job job1 = Job.getInstance(conf, "US-Census-1"); // Current class. job1.setJarByClass(USCensusFirstJob.class); // Mapper job1.setMapperClass(USCensusFirstMapper.class); // Combiner. We use the reducer as the combiner in this case. job1.setCombinerClass(USCensusFirstCombiner.class); // Reducer job1.setReducerClass(USCensusFirstReducer.class); job1.setPartitionerClass(USCensusFirstPartitioner.class); job1.setNumReduceTasks(USCensusFirstPartitioner.statesName.size()); // Outputs from the Mapper. job1.setMapOutputKeyClass(Text.class); job1.setMapOutputValueClass(MapperOPValue.class); // Outputs from Reducer. It is sufficient to set only the following // two properties // if the Mapper and Reducer has same key and value types. It is set // separately for // elaboration. job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); // path to input in HDFS FileInputFormat.addInputPath(job1, new Path(args[0])); FileSystem fileSystem = FileSystem.get(conf); String outputPath = args[1] + "_temp"; if (fileSystem.exists(new Path(outputPath))) { fileSystem.delete(new Path(outputPath), true); } // path to output in HDFS FileOutputFormat.setOutputPath(job1, new Path(outputPath)); // Block until the job is completed. boolean isCompleted = job1.waitForCompletion(true); //Second job // Configuration conf2 = new Configuration(); if (isCompleted) { Job job2 = Job.getInstance(conf, "US-Census-2"); job2.setJarByClass(USCensusFirstJob.class); // Mapper job2.setMapperClass(USCensusSecondMapper.class); // Reducer job2.setReducerClass(USCensusSecondReducer.class); job2.setNumReduceTasks(1); // Outputs from the Mapper. job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(Text.class); // Outputs from Reducer. It is sufficient to set only the following // two properties // if the Mapper and Reducer has same key and value types. It is set // separately for // elaboration. job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); // path to input in HDFS FileInputFormat.addInputPath(job2, new Path(outputPath)); // FileSystem fileSystem = FileSystem.get(conf1); String outputPath2 = args[1]; if (fileSystem.exists(new Path(outputPath2))) { fileSystem.delete(new Path(outputPath2), true); } // path to output in HDFS FileOutputFormat.setOutputPath(job2, new Path(outputPath2)); System.exit(job2.waitForCompletion(true) ? 0 : 1); } } catch (IOException e) { System.err.println(e.getMessage()); } catch (InterruptedException e) { System.err.println(e.getMessage()); } catch (ClassNotFoundException e) { System.err.println(e.getMessage()); } }
From source file:sa.edu.kaust.twitter.index.IndexTweets.java
License:Apache License
public static void run(String input, String output, int reduceTasks, String hashtag, String url, String startID, String endID, boolean expandHashtag, boolean expandURL) throws Exception { Path inputPath = new Path(input); Path outputPath = new Path(output); sLogger.info("input dir: " + inputPath); sLogger.info("output dir: " + outputPath); sLogger.info("num of output files: " + reduceTasks); Configuration conf = new Configuration(); conf.set("startID", startID); conf.set("endID", endID); conf.setBoolean("expandHashtag", expandHashtag); conf.setBoolean("expandURL", expandURL); FileSystem fs = FileSystem.get(conf); Job job = new Job(conf, "IPIndexTweets"); job.setJarByClass(IndexTweets.class); job.setNumReduceTasks(reduceTasks);//from w w w.j av a 2 s.c om if (job.getConfiguration().get("mapred.job.tracker").equals("local")) { job.getConfiguration().set("HashtagRepresentation", hashtag); job.getConfiguration().set("UrlRepresentation", url); } else { DistributedCache.addCacheFile(new URI(hashtag), job.getConfiguration()); DistributedCache.addCacheFile(new URI(url), job.getConfiguration()); } FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); //conf.set("mapred.child.java.opts", "-Xmx2048m"); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(PairOfStringLong.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(TweetPostingsList.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // delete the output directory if it exists already //FileSystem.get(conf).delete(new Path(output), true); if (fs.exists(outputPath)) { sLogger.info("Output already exists: skipping!"); return; } long startTime = System.currentTimeMillis(); job.waitForCompletion(true); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); }
From source file:sampler.TotalOrderPrep.java
License:Open Source License
public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException { Job sample_job = new Job(); // Remember the real input format so the sampling input format can use // it under the hood sample_job.getConfiguration().setBoolean(ARG_INPUTFORMAT, true); sample_job.setInputFormatClass(TextInputFormat.class); //sample_job.getConfiguration().set("mapred.fairscheduler.pool", "pool9"); // Base the sample size on the number of reduce tasks that will be used // by the real job, but only use 1 reducer for this job (maps output very // little)//from w w w.j a va2s . com sample_job.setNumReduceTasks(1); // Make this job's output a temporary filethe input file for the real job's // TotalOrderPartitioner Path partition = new Path("partitions/"); //partition.getFileSystem(job.getConfiguration()).deleteOnExit(partition); conf = new Configuration(); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(partition)) { fs.delete(partition, true); } } catch (IOException e) { e.printStackTrace(); } FileOutputFormat.setOutputPath(sample_job, partition); FileInputFormat.setInputPaths(sample_job, new Path(args[0])); //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path(partition, "part-r-00000")); //job.setPartitionerClass(TotalOrderPartitioner.class); // If there's a combiner, turn it into an identity reducer to prevent // destruction of keys. sample_job.setCombinerClass(Combiner.class); sample_job.setMapOutputKeyClass(ImmutableBytesWritable.class); sample_job.setMapOutputValueClass(ImmutableBytesWritable.class); sample_job.setOutputKeyClass(ImmutableBytesWritable.class); sample_job.setOutputValueClass(NullWritable.class); sample_job.setPartitionerClass(HashPartitioner.class); sample_job.setOutputFormatClass(SequenceFileOutputFormat.class); sample_job.setJarByClass(TotalOrderPrep.class); sample_job.setMapperClass(Map.class); sample_job.setReducerClass(PartitioningReducer.class); sample_job.setJobName("(Sampler)"); sample_job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); sample_job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); return sample_job; // Run the job. If it fails, then it's probably because of the main job. /*try { sample_job.waitForCompletion(false); if( !sample_job.isSuccessful() ) throw new RuntimeException("Partition sampler job failed."); } catch (Exception e) { throw new RuntimeException("Failed to start Partition sampler.", e); }*/ }
From source file:SecondarySort.HashToAlternateWithSS.java
protected Job jobConfig() throws IOException { JobConf conf = new JobConf(); Job job = new Job(conf, "iteration"); job.setJarByClass(HashToAlternateWithSS.class); job.setReducerClass(ReduceSS.class); job.setPartitionerClass(LongPair.HPartitioner.class); job.setSortComparatorClass(LongPair.Comparator.class); job.setGroupingComparatorClass(LongPair.GroupComparator.class); job.setOutputKeyClass(LongPair.class); job.setOutputValueClass(Text.class); return job;/*from w ww. ja v a 2s . com*/ }