Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException 

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:BigramRelativeFrequencyTuple.java

License:Apache License

/**
 * Runs this tool.//  w  ww .  ja  v  a  2 s.c  o m
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int reduceTasks = Integer.parseInt(args[2]);

    LOG.info("Tool name: " + BigramRelativeFrequencyTuple.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(BigramRelativeFrequencyTuple.class.getSimpleName());
    job.setJarByClass(BigramRelativeFrequencyTuple.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(BinSedesTuple.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(BinSedesTuple.class);
    job.setOutputValueClass(FloatWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:accismus.benchmark.Generator.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 3) {
        System.err.println(//from w w  w .j  av a  2  s  .c o  m
                "Usage : " + this.getClass().getSimpleName() + " <props file> <num task> <num documents>");
        return 1;
    }

    Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());

    job.setInputFormatClass(DocumentInputFormat.class);

    DocumentInputFormat.setNumTask(job, Integer.parseInt(args[1]), Integer.parseInt(args[2]));

    job.setMapOutputKeyClass(Loader.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setMapperClass(GMapper.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccismusOutputFormat.class);

    Properties accisumusProps = new Properties();
    accisumusProps.load(new FileReader(args[0]));
    AccismusOutputFormat.configure(job, accisumusProps);

    job.waitForCompletion(true);

    return 0;
}

From source file:adts.CreateCorrespondences.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration config = HBaseConfiguration.create();
    Job job = new Job(config, "CreateCorrespondences");
    job.setJarByClass(CreateCorrespondences.class);

    Scan scan = new Scan();
    scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
    scan.setCacheBlocks(false); // don't set to true for MR jobs

    TableMapReduceUtil.initTableMapperJob("content", // input HBase table name
            scan, // Scan instance to control CF and attribute selection
            Map.class, // mapper
            Text.class, // mapper output key
            Text.class, // mapper output value
            job);/*from   w  ww  .j av  a  2 s . c  om*/
    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(1);
    FileOutputFormat.setOutputPath(job, new Path(args[0]));

    boolean b = job.waitForCompletion(true);
    if (!b) {
        throw new IOException("error with job!");
    }
}

From source file:adts.PopularKeywords.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "PopularKeywords");
    job.setJarByClass(PopularKeywords.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path queriesInputPath = new Path(args[0]);
    Path StopWordsInputPath = new Path(args[1]);
    MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, Map.class);
    MultipleInputs.addInputPath(job, StopWordsInputPath, TextInputFormat.class, StopwordsMap.class);

    FileOutputFormat.setOutputPath(job, new Path("/root/temporary"));

    job.waitForCompletion(true);/*from   w  ww . ja v a  2  s  .  co  m*/

    Job sortingJob = new Job(conf, "PopularKeywords");
    sortingJob.setJarByClass(PopularKeywords.class);

    sortingJob.setOutputKeyClass(Text.class);
    sortingJob.setOutputValueClass(LongWritable.class);

    sortingJob.setMapperClass(ReverseMap.class);
    sortingJob.setReducerClass(ReverseReduce.class);

    sortingJob.setInputFormatClass(TextInputFormat.class);
    sortingJob.setOutputFormatClass(TextOutputFormat.class);
    sortingJob.setSortComparatorClass(LongWritable.DecreasingComparator.class);
    sortingJob.setMapOutputKeyClass(LongWritable.class);
    sortingJob.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(sortingJob, new Path("/root/temporary"));
    FileOutputFormat.setOutputPath(sortingJob, new Path(args[2]));

    sortingJob.setNumReduceTasks(1);
    sortingJob.waitForCompletion(true);
}

From source file:adts.PrepareInput.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "PrepareInput");
    job.setJarByClass(PrepareInput.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setNumReduceTasks(1);
    job.waitForCompletion(true);/* w ww . j  a v a 2  s. co  m*/
}

From source file:AllLab_Skeleton.Lab2.Lab2SecondarySort.java

/**
 * @param args the command line arguments
 *//*  ww w .  ja  v  a  2 s.co m*/
public static void main(String[] args) {

    try {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "SecondarySort");
        job.setJarByClass(Lab2SecondarySort.class);

        job.setMapperClass(Lab2Mapper.class);
        job.setMapOutputKeyClass(CompositeKeyWritable.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setPartitionerClass(Lab2Partitioner.class);
        job.setGroupingComparatorClass(Lab2GroupComparator.class);

        job.setReducerClass(Lab2Reducer.class);
        job.setOutputKeyClass(CompositeKeyWritable.class);
        job.setOutputValueClass(NullWritable.class);

        job.setNumReduceTasks(8);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);

    } catch (IOException | InterruptedException | ClassNotFoundException ex) {
        System.out.println("Erorr Message" + ex.getMessage());
    }
}

From source file:AllLab_Skeleton.Lab6.BloomFilterBhavesh.java

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Bloom Filter");
    job.setJarByClass(BloomFilterBhavesh.class);
    job.setMapperClass(BloomFilterMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(0);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean success = job.waitForCompletion(true);
    System.out.println(success);/*from   w  ww  .  jav a2  s  .  c o m*/

}

From source file:AllLab_Skeleton.Lab6.BloomFilterUsingDistributedCache.java

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf, "Bloom Filter");
    job.setJarByClass(BloomFilterUsingDistributedCache.class);
    job.setMapperClass(BloomFilterMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    //adding the file in the cache having the Person class records
    //job.addCacheFile(new Path("localhost:9000/bhavesh/LabAssignment/CacheInput/cache.txt").toUri());
    DistributedCache.addCacheFile(new URI(args[2]), job.getConfiguration());
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);//  w ww.j  a va 2 s  .c  om

}

From source file:Analysis.A2_Top_20_Most_Popular_Artists.Top_20_Most_Popular_Artist_Driver.java

/**
 * @param args the command line arguments
 *///  w w  w .j av a 2  s  .  c o m

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Top 20 most popular artist ");
    job.setJarByClass(Top_20_Most_Popular_Artist_Driver.class);

    job.setMapperClass(Top_20_Most_Popular_Artist_Mapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setNumReduceTasks(1);
    job.setCombinerClass(Top_20_Most_Popular_Artist_Combiner.class);
    job.setReducerClass(Top_20_Most_Popular_Artist_Reducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Analysis.A5_Min_Max_Median_Age_Top_Countries.Min_Max_Age_By_Country_Driver.java

/**
 * @param args the command line arguments
 *//*  w ww . j a  v  a 2  s  . c  o  m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Min, Max, Average, Median age of users by country");
    job.setJarByClass(Min_Max_Age_By_Country_Driver.class);
    job.setMapperClass(Min_Max_Age_By_Country_Mapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(Min_Max_Age_By_Country_Reducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}