Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException 

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:PartitionGraph.java

License:Apache License

/**
 * Runs this tool./*from  w  w w  .  ja  v  a2s.  com*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(RANGE, "use range partitioner"));

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions")
            .create(NUM_PARTITIONS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(NUM_PARTITIONS)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inPath = cmdline.getOptionValue(INPUT);
    String outPath = cmdline.getOptionValue(OUTPUT);
    int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS));
    boolean useRange = cmdline.hasOption(RANGE);

    LOG.info("Tool name: " + PartitionGraph.class.getSimpleName());
    LOG.info(" - input dir: " + inPath);
    LOG.info(" - output dir: " + outPath);
    LOG.info(" - num partitions: " + numParts);
    LOG.info(" - node cnt: " + nodeCount);
    LOG.info(" - use range partitioner: " + useRange);

    Configuration conf = getConf();
    conf.setInt("NodeCount", nodeCount);

    Job job = Job.getInstance(conf);
    job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath);
    job.setJarByClass(PartitionGraph.class);

    job.setNumReduceTasks(numParts);

    FileInputFormat.setInputPaths(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNodeMultiSrc.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNodeMultiSrc.class);

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    FileSystem.get(conf).delete(new Path(outPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:CountJob.java

License:Apache License

public static void doJob(String param, String args[], String msgs)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    conf.set(TokenizerMapper.PATTERN, args[2]);
    FileSystem hdfs = FileSystem.get(conf);
    Path tempOutput1 = new Path("/data/output/temp/" + param + "1");
    Path tempOutput2 = new Path("/data/output/temp/" + param + "2");
    if (hdfs.exists(tempOutput1) || hdfs.exists(tempOutput2)) {
        hdfs.delete(tempOutput1, true);//from   ww w.j  av a 2s.c  o m
        hdfs.delete(tempOutput2, true);
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(CountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempOutput1);
    job.waitForCompletion(true);

    Job sortJob1 = new Job(conf);
    sortJob1.setJobName("grep-sort");
    FileInputFormat.setInputPaths(sortJob1, tempOutput1);
    sortJob1.setInputFormatClass(SequenceFileInputFormat.class);
    sortJob1.setMapperClass(InverseMapper.class);
    sortJob1.setNumReduceTasks(1); // write a single file
    FileOutputFormat.setOutputPath(sortJob1, tempOutput2);
    sortJob1.setSortComparatorClass( // sort by decreasing freq
            LongWritable.DecreasingComparator.class);
    sortJob1.waitForCompletion(true);
    hdfs.delete(tempOutput1, true);

}

From source file:lab2_3.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem.get(conf).delete(new Path(args[1]), true);
    FileSystem.get(conf).delete(TMPDIR, true);

    Job first = Job.getInstance(conf, "drive time lab 2.2");
    first.setJarByClass(lab2_3.class);
    first.setMapperClass(lab2_2.PartitioningMapper.class);
    first.setPartitionerClass(lab2_2.TypePartitioner.class);
    first.setReducerClass(lab2_2.IdentityReducer.class);
    first.setNumReduceTasks(6);

    first.setOutputKeyClass(IntWritable.class);
    first.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(first, new Path(args[0]));

    FileOutputFormat.setOutputPath(first, TMPDIR);

    int code = first.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Job second = Job.getInstance(conf, "drive time lab 2.3");
        second.setJarByClass(lab2_3.class);
        second.setMapperClass(MMMaper.class);
        second.setReducerClass(Reeeducer.class);
        second.setPartitionerClass(Partitioneeeer.class);

        second.setNumReduceTasks(6);/* ww w  .j av a2  s .com*/
        second.setOutputKeyClass(Text.class);
        second.setOutputValueClass(lab2_1.Statistics.class);

        FileInputFormat.addInputPath(second, TMPDIR);
        FileOutputFormat.setOutputPath(second, new Path(args[1]));
        code = second.waitForCompletion(true) ? 0 : 1;
    }
    //        FileSystem.get(conf).delete(TMPDIR, true);
    System.exit(code);
}

From source file:BuildPersonalizedPageRankRecords.java

License:Apache License

/**
 * Runs this tool.//from  w  w w.  j av a 2s. c om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    //parsing more than 1 integer later;
    options.addOption(
            OptionBuilder.withArgName("src").hasArg().withDescription("source of pagerank").create(SOURCES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(SOURCES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    //Change to array later
    String src = cmdline.getOptionValue(SOURCES);

    LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numNodes: " + n);

    Configuration conf = getConf();
    conf.setInt(NODE_CNT_FIELD, n);
    //more to be set later;
    conf.set(NODE_SRC, src);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(BuildPersonalizedPageRankRecords.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNodeMultiSrc.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNodeMultiSrc.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:RunPersonalizedPageRankBasic.java

License:Apache License

private void phase1(int i, int j, String basePath, int numNodes, boolean useCombiner,
        boolean useInMapperCombiner) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("PageRank:Basic:iteration" + j + ":Phase1");
    job.setJarByClass(RunPersonalizedPageRankBasic.class);

    String in = basePath + "/iter" + formatter.format(i);
    String out = basePath + "/iter" + formatter.format(j);
    //String outm = out + "-mass";

    // We need to actually count the number of part files to get the number of partitions (because
    // the directory might contain _log).
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(in))) {
        if (s.getPath().getName().contains("part-"))
            numPartitions++;//w  w  w.j a va 2 s . c om
    }

    LOG.info("PageRank: iteration " + j + ": Phase1");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);
    LOG.info(" - nodeCnt: " + numNodes);
    LOG.info(" - useCombiner: " + useCombiner);
    LOG.info(" - useInmapCombiner: " + useInMapperCombiner);
    LOG.info("computed number of partitions: " + numPartitions);

    int numReduceTasks = numPartitions;

    job.getConfiguration().setInt("NodeCount", numNodes);
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    //job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m");
    //job.getConfiguration().set("PageRankMassPath", outm);

    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNodeMultiSrc.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNodeMultiSrc.class);

    job.setMapperClass(/*useInMapperCombiner ? MapWithInMapperCombiningClass.class : */MapClass.class);

    if (useCombiner) {
        job.setCombinerClass(CombineClass.class);
    }

    job.setReducerClass(ReduceClass.class);

    FileSystem.get(getConf()).delete(new Path(out), true);
    //FileSystem.get(getConf()).delete(new Path(outm), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    //ArrayList<Float> mass;
    /*float[] masslist;
    FileSystem fs = FileSystem.get(getConf());
    int flag=0
    for (FileStatus f : fs.listStatus(new Path(outm))) {
      FSDataInputStream fin = fs.open(f.getPath());
      while(fin.available()>0)
      {      
        if (flag==0)
        {
           mass.add(fin.readFloat());
           flag++;
        }
               
      }
      fin.close();
    }
            
    return mass;*/
}

From source file:inMapperStripes.java

License:Apache License

/**
 * Runs this tool.//from  w ww.  java2  s  .c  om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;
    int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2;

    LOG.info("Tool: " + inMapperStripes.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - window: " + window);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(inMapperStripes.class.getSimpleName());
    job.setJarByClass(inMapperStripes.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.getConfiguration().setInt("window", window);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(HMapSIW.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(FloatWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:RecordExtracting.java

License:Apache License

/**
 * Runs this tool./* ww  w .  j a v  a  2 s  . c o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    /* options.addOption(OptionBuilder.withArgName("path").hasArg()
         .withDescription("input path").create(INPUT));
     options.addOption(OptionBuilder.withArgName("path").hasArg()
         .withDescription("output path").create(OUTPUT));
     options.addOption(OptionBuilder.withArgName("num").hasArg()
         .withDescription("number of nodes").create(NUM_NODES));
     //parsing more than 1 integer later;*/
    options.addOption(
            OptionBuilder.withArgName("src").hasArg().withDescription("spamming users").create(SOURCES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    /* if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)||!cmdline.hasOption(SOURCES)) {
       System.out.println("args: " + Arrays.toString(args));
       HelpFormatter formatter = new HelpFormatter();
       formatter.setWidth(120);
       formatter.printHelp(this.getClass().getName(), options);
       ToolRunner.printGenericCommandUsage(System.out);
       return -1;
     }*/

    String inputPath = "xzzqskfinal/reviewsNew.txt";//cmdline.getOptionValue(INPUT);
    String outputPath = "xzzqskfinal/SpammingRecord";//cmdline.getOptionValue(OUTPUT);
    //int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    //Change to array later
    String src = cmdline.getOptionValue(SOURCES);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + RecordExtracting.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    //LOG.info(" - numNodes: " + n);

    Configuration conf = getConf();

    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
    //conf.setInt(NODE_CNT_FIELD, n);
    //more to be set later;
    conf.set(NODE_SRC, src);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(RatingSpamming.class.getSimpleName());
    job.setJarByClass(RecordExtracting.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    //job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:RunPageRankBasic.java

License:Apache License

private float phase1(int i, int j, String basePath, int numNodes, boolean useCombiner,
        boolean useInMapperCombiner) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("PageRank:Basic:iteration" + j + ":Phase1");
    job.setJarByClass(RunPageRankBasic.class);

    String in = basePath + "/iter" + formatter.format(i);
    String out = basePath + "/iter" + formatter.format(j) + "t";
    String outm = out + "-mass";

    // We need to actually count the number of part files to get the number of partitions (because
    // the directory might contain _log).
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(in))) {
        if (s.getPath().getName().contains("part-"))
            numPartitions++;//from  w  ww . j a v  a  2  s.  co  m
    }

    LOG.info("PageRank: iteration " + j + ": Phase1");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);
    LOG.info(" - nodeCnt: " + numNodes);
    LOG.info(" - useCombiner: " + useCombiner);
    LOG.info(" - useInmapCombiner: " + useInMapperCombiner);
    LOG.info("computed number of partitions: " + numPartitions);

    int numReduceTasks = numPartitions;

    job.getConfiguration().setInt("NodeCount", numNodes);
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    //job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m");
    job.getConfiguration().set("PageRankMassPath", outm);

    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(useInMapperCombiner ? MapWithInMapperCombiningClass.class : MapClass.class);

    if (useCombiner) {
        job.setCombinerClass(CombineClass.class);
    }

    job.setReducerClass(ReduceClass.class);

    FileSystem.get(getConf()).delete(new Path(out), true);
    FileSystem.get(getConf()).delete(new Path(outm), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    float mass = Float.NEGATIVE_INFINITY;
    FileSystem fs = FileSystem.get(getConf());
    for (FileStatus f : fs.listStatus(new Path(outm))) {
        FSDataInputStream fin = fs.open(f.getPath());
        mass = sumLogProbs(mass, fin.readFloat());
        fin.close();
    }

    return mass;
}

From source file:RunPageRankBasic.java

License:Apache License

private void phase2(int i, int j, float missing, String basePath, int numNodes) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("PageRank:Basic:iteration" + j + ":Phase2");
    job.setJarByClass(RunPageRankBasic.class);

    LOG.info("missing PageRank mass: " + missing);
    LOG.info("number of nodes: " + numNodes);

    String in = basePath + "/iter" + formatter.format(j) + "t";
    String out = basePath + "/iter" + formatter.format(j);

    LOG.info("PageRank: iteration " + j + ": Phase2");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);

    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setFloat("MissingMass", (float) missing);
    job.getConfiguration().setInt("NodeCount", numNodes);

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MapPageRankMassDistributionClass.class);

    FileSystem.get(getConf()).delete(new Path(out), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);/* w  w w.ja v a  2s  . c o m*/
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}

From source file:lab2_2.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem.get(conf).delete(new Path(args[1]), true);

    Job job = Job.getInstance(conf, "drive time lab 2.1");
    job.setJarByClass(lab2_1.class);
    job.setMapperClass(PartitioningMapper.class);
    job.setPartitionerClass(TypePartitioner.class);
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(6);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}