Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException 

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:cosmos.mapred.MediawikiIngestJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (1 != args.length) {
        System.err.println("Usage: input.xml,input.xml,input.xml...");
        return 1;
    }/*from w  ww. j a v  a  2s.c o m*/

    String inputFiles = args[0];

    Configuration conf = getConf();
    System.out.println("path " + conf.get("fs.default.name"));
    conf.addResource(new Path("/opt/hadoop/conf/hdfs-site.xml"));
    conf.addResource(new Path("/opt/hadoop/conf/core-site.xml"));

    conf.addResource(new Path("/opt/hadoop/conf/mapred-site.xml"));

    System.out.println("path " + conf.get("fs.default.name"));
    //System.exit(1);
    Job job = new Job(conf, "Mediawiki Ingest");

    job.setJarByClass(MediawikiIngestJob.class);

    String tablename = "sortswiki";
    String zookeepers = "localhost:2181";
    String instanceName = "accumulo";
    String user = "root";
    PasswordToken passwd = new PasswordToken("secret");

    FileInputFormat.setInputPaths(job, inputFiles);

    job.setMapperClass(MediawikiMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    BatchWriterConfig bwConfig = new BatchWriterConfig();

    job.setInputFormatClass(MediawikiInputFormat.class);
    AccumuloOutputFormat.setZooKeeperInstance(job, instanceName, zookeepers);
    AccumuloOutputFormat.setConnectorInfo(job, user, passwd);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, tablename);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:counting.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    final int NUMBER_OF_NODES = 31;
    final int MAX_NUMBER_OF_TASKS = 1000;
    final double REDUCER_CONSTANT = 0.95; // or 1.75

    if (otherArgs.length < 5) {
        System.err.println(/*from  w w  w  .  j  a v  a 2 s  .co  m*/
                "Usage: wordcount <in> [<in>...] <out> <ngram> <combiner:yes/no> <custom partioner:yes/no>");
        System.exit(2);
    }

    Job job = Job.getInstance(conf, "Word count");

    // Setting map and reduce tasks
    //conf.setNumMapTasks(5); // Not possible with code in line?
    int NUMBER_OF_REDUCERS = (int) REDUCER_CONSTANT * NUMBER_OF_NODES * MAX_NUMBER_OF_TASKS;
    //System.out.println("Number of Reducers: " + NUMBER_OF_REDUCERS);
    job.setNumReduceTasks(12); // Placeholder

    job.setJarByClass(WordCount.class);
    job.setMapperClass(nGramMapper.class);
    nGramMapper.setN(Integer.parseInt(otherArgs[otherArgs.length - 3])); // Set ngram length
    System.out.println("n = " + nGramMapper.getN());
    System.out.println("Combiner = " + otherArgs[otherArgs.length - 2]);
    System.out.println("Custom Partitioner = " + otherArgs[otherArgs.length - 1]);
    System.out.println("Number of reducers = " + NUMBER_OF_NODES);
    if (otherArgs[otherArgs.length - 2].equals("yes")) {
        job.setCombinerClass(IntSumReducer.class);
    }

    if (otherArgs[otherArgs.length - 1].equals("yes")) {
        job.setPartitionerClass(CustomPartitioner.class);
        //CustomPartitioner.setNumberOfReducers(NUMBER_OF_REDUCERS);
    }
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    // Input paths
    for (int i = 0; i < otherArgs.length - 4; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    // Output paths
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 4]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public int run(String[] args) throws Exception {
        if (args.length != 2) {
            System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
            ToolRunner.printGenericCommandUsage(System.err);
            return -1;
        }//from www.  ja  va2 s . c  om

        Job job = new Job(getConf(), "Logging job");
        job.setJarByClass(getClass());

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.setMapperClass(LoggingIdentityMapper.class);
        job.setNumReduceTasks(0);

        return job.waitForCompletion(true) ? 0 : 1;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public int run(String[] args) throws Exception {
        Job job = JobBuilder.parseInputAndOutput(this, getConf(), args);
        if (job == null) {
            return -1;
        }/*ww  w  . j a  va  2  s  .  co  m*/

        /*[*/job.setInputFormatClass(TextInputFormat.class);

        job.setMapperClass(Mapper.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setPartitionerClass(HashPartitioner.class);

        job.setNumReduceTasks(1);
        job.setReducerClass(Reducer.class);

        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Text.class);

        job.setOutputFormatClass(TextOutputFormat.class);/*]*/

        return job.waitForCompletion(true) ? 0 : 1;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public int run(String[] args) throws Exception {
        Job job = JobBuilder.parseInputAndOutput(this, getConf(), args);
        if (job == null) {
            return -1;
        }//from ww  w. j  ava 2s.  c o m

        job.setMapperClass(CleanerMapper.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);
        job.setNumReduceTasks(0);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setCompressOutput(job, true);
        SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

        return job.waitForCompletion(true) ? 0 : 1;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

public int run(String[] args) throws Exception {
        Job job = new Job(getConf());

        job.setJarByClass(MaxWidgetId.class);

        job.setMapperClass(MaxWidgetMapper.class);
        job.setReducerClass(MaxWidgetReducer.class);

        FileInputFormat.addInputPath(job, new Path("widgets"));
        FileOutputFormat.setOutputPath(job, new Path("maxwidget"));

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Widget.class);

        job.setOutputKeyClass(Widget.class);
        job.setOutputValueClass(NullWritable.class);

        job.setNumReduceTasks(1);

        if (!job.waitForCompletion(true)) {
            return 1; // error.
        }//from   www .jav  a 2s  . c  om

        return 0;
    }

From source file:cs6240.project.decisiontree.Pseudohigstest.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    DistributedCache.addCacheFile(new URI("s3://hr6240/higs/testing/5/higshistogram"), conf);
    // DistributedCache.addCacheFile(new
    // URI("/home/hraj17/Downloads/part-hig"),conf);
    Job job = new Job(conf, "word count");
    job.setJarByClass(Pseudohigstest.class);
    job.setMapperClass(TestingMapper.class);
    job.setReducerClass(TestingReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setPartitionerClass(TestingPartioner.class);
    job.setNumReduceTasks(2);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:cs6240.project.decisiontree.Pseudotestingtwitter.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    DistributedCache.addCacheFile(new URI("s3://hr6240/histogram/5/metadata5"), conf);
    Job job = new Job(conf, "word count");
    job.setJarByClass(Pseudotestingtwitter.class);
    job.setMapperClass(TestingMapper.class);
    job.setReducerClass(TestingReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setPartitionerClass(TestingPartioner.class);
    job.setNumReduceTasks(2);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:csc555.ebratt.depaul.edu.AverageScoreDriver.java

License:Open Source License

/**
 * /*w  w  w .  ja v  a  2s.  c  o m*/
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the AverageScoreReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    String groupBy = getConf().get("groupBy");
    StringBuffer sb = new StringBuffer();
    sb.append("average score by: ");
    sb.append(groupBy);
    job.setJobName(sb.toString());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // testing -- ensure each node gets 2 reducers
    JobConf jobConf = new JobConf(getConf(), AverageScoreDriver.class);
    JobClient jobClient = new JobClient(jobConf);
    ClusterStatus cluster = jobClient.getClusterStatus();
    job.setNumReduceTasks(cluster.getTaskTrackers() * 2);

    // Mapper and Reducer Classes to use
    job.setMapperClass(AverageScoreMapper.class);
    job.setReducerClass(AverageScoreReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(AverageScoreReducer.class);
    }

    // The Jar file to run
    job.setJarByClass(AverageScoreDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}

From source file:csc555.ebratt.depaul.edu.AverageScoreRankerDriver.java

License:Open Source License

/**
 * //from w  w w.  j  a  v  a2s. c  om
 * Runs the driver by creating a new hadoop Job based on the configuration.
 * Defines the path in/out based on the first two arguments. Allows for an
 * optional combiner based on the 4th argument.
 * 
 * @param args
 *            [0] the input directory on HDFS
 * @param args
 *            [1] the output directory on HDFS
 * @param args
 *            [2] tells the system whether or not to use a combiner ("yes")
 *            and, if so, it will use the AverageScoreRankerReducer.class as the
 *            combiner.
 * @throws Exception
 *             if there is an issue with any of the arguments
 * 
 */
@Override
public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    job.setJobName("average score ranked");

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setNumReduceTasks(1);

    // Mapper and Reducer Classes to use
    job.setMapperClass(AverageScoreRankerMapper.class);
    job.setReducerClass(AverageScoreRankerReducer.class);

    // Mapper output classes
    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Input format class
    job.setInputFormatClass(TextInputFormat.class);

    // Reducer output classes
    job.setOutputKeyClass(DoubleWritable.class);
    job.setOutputValueClass(Text.class);

    // Output format class
    job.setOutputFormatClass(TextOutputFormat.class);

    // Combiner
    if (args[2].equals("yes")) {
        job.setCombinerClass(AverageScoreRankerReducer.class);
    }

    // sort in descending order
    job.setSortComparatorClass(DoubleWritableDescendingComparator.class);

    // The Jar file to run
    job.setJarByClass(AverageScoreRankerDriver.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);

    return 0;
}