List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:cosmos.mapred.MediawikiIngestJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (1 != args.length) { System.err.println("Usage: input.xml,input.xml,input.xml..."); return 1; }/*from w ww. j a v a 2s.c o m*/ String inputFiles = args[0]; Configuration conf = getConf(); System.out.println("path " + conf.get("fs.default.name")); conf.addResource(new Path("/opt/hadoop/conf/hdfs-site.xml")); conf.addResource(new Path("/opt/hadoop/conf/core-site.xml")); conf.addResource(new Path("/opt/hadoop/conf/mapred-site.xml")); System.out.println("path " + conf.get("fs.default.name")); //System.exit(1); Job job = new Job(conf, "Mediawiki Ingest"); job.setJarByClass(MediawikiIngestJob.class); String tablename = "sortswiki"; String zookeepers = "localhost:2181"; String instanceName = "accumulo"; String user = "root"; PasswordToken passwd = new PasswordToken("secret"); FileInputFormat.setInputPaths(job, inputFiles); job.setMapperClass(MediawikiMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setOutputFormatClass(AccumuloOutputFormat.class); BatchWriterConfig bwConfig = new BatchWriterConfig(); job.setInputFormatClass(MediawikiInputFormat.class); AccumuloOutputFormat.setZooKeeperInstance(job, instanceName, zookeepers); AccumuloOutputFormat.setConnectorInfo(job, user, passwd); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); AccumuloOutputFormat.setCreateTables(job, true); AccumuloOutputFormat.setDefaultTableName(job, tablename); return job.waitForCompletion(true) ? 0 : 1; }
From source file:counting.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); final int NUMBER_OF_NODES = 31; final int MAX_NUMBER_OF_TASKS = 1000; final double REDUCER_CONSTANT = 0.95; // or 1.75 if (otherArgs.length < 5) { System.err.println(/*from w w w . j a v a 2 s .co m*/ "Usage: wordcount <in> [<in>...] <out> <ngram> <combiner:yes/no> <custom partioner:yes/no>"); System.exit(2); } Job job = Job.getInstance(conf, "Word count"); // Setting map and reduce tasks //conf.setNumMapTasks(5); // Not possible with code in line? int NUMBER_OF_REDUCERS = (int) REDUCER_CONSTANT * NUMBER_OF_NODES * MAX_NUMBER_OF_TASKS; //System.out.println("Number of Reducers: " + NUMBER_OF_REDUCERS); job.setNumReduceTasks(12); // Placeholder job.setJarByClass(WordCount.class); job.setMapperClass(nGramMapper.class); nGramMapper.setN(Integer.parseInt(otherArgs[otherArgs.length - 3])); // Set ngram length System.out.println("n = " + nGramMapper.getN()); System.out.println("Combiner = " + otherArgs[otherArgs.length - 2]); System.out.println("Custom Partitioner = " + otherArgs[otherArgs.length - 1]); System.out.println("Number of reducers = " + NUMBER_OF_NODES); if (otherArgs[otherArgs.length - 2].equals("yes")) { job.setCombinerClass(IntSumReducer.class); } if (otherArgs[otherArgs.length - 1].equals("yes")) { job.setPartitionerClass(CustomPartitioner.class); //CustomPartitioner.setNumberOfReducers(NUMBER_OF_REDUCERS); } job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // Input paths for (int i = 0; i < otherArgs.length - 4; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } // Output paths FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 4])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from www. ja va2 s . c om Job job = new Job(getConf(), "Logging job"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LoggingIdentityMapper.class); job.setNumReduceTasks(0); return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; }/*ww w . j a va 2 s . co m*/ /*[*/job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Mapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(1); job.setReducerClass(Reducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class);/*]*/ return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; }//from ww w. j ava 2s. c o m job.setMapperClass(CleanerMapper.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(MaxWidgetId.class); job.setMapperClass(MaxWidgetMapper.class); job.setReducerClass(MaxWidgetReducer.class); FileInputFormat.addInputPath(job, new Path("widgets")); FileOutputFormat.setOutputPath(job, new Path("maxwidget")); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Widget.class); job.setOutputKeyClass(Widget.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); if (!job.waitForCompletion(true)) { return 1; // error. }//from www .jav a 2s . c om return 0; }
From source file:cs6240.project.decisiontree.Pseudohigstest.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); DistributedCache.addCacheFile(new URI("s3://hr6240/higs/testing/5/higshistogram"), conf); // DistributedCache.addCacheFile(new // URI("/home/hraj17/Downloads/part-hig"),conf); Job job = new Job(conf, "word count"); job.setJarByClass(Pseudohigstest.class); job.setMapperClass(TestingMapper.class); job.setReducerClass(TestingReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setPartitionerClass(TestingPartioner.class); job.setNumReduceTasks(2); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:cs6240.project.decisiontree.Pseudotestingtwitter.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); DistributedCache.addCacheFile(new URI("s3://hr6240/histogram/5/metadata5"), conf); Job job = new Job(conf, "word count"); job.setJarByClass(Pseudotestingtwitter.class); job.setMapperClass(TestingMapper.class); job.setReducerClass(TestingReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setPartitionerClass(TestingPartioner.class); job.setNumReduceTasks(2); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:csc555.ebratt.depaul.edu.AverageScoreDriver.java
License:Open Source License
/** * /*w w w . ja v a 2s. c o m*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the AverageScoreReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("average score by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), AverageScoreDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(AverageScoreMapper.class); job.setReducerClass(AverageScoreReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(AverageScoreReducer.class); } // The Jar file to run job.setJarByClass(AverageScoreDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.AverageScoreRankerDriver.java
License:Open Source License
/** * //from w w w. j a v a2s. c om * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the AverageScoreRankerReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJobName("average score ranked"); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(AverageScoreRankerMapper.class); job.setReducerClass(AverageScoreRankerReducer.class); // Mapper output classes job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(AverageScoreRankerReducer.class); } // sort in descending order job.setSortComparatorClass(DoubleWritableDescendingComparator.class); // The Jar file to run job.setJarByClass(AverageScoreRankerDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }