Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n)

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:edu.uci.ics.hyracks.imru.util.DataBalancer.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(DataBalancer.class);

    job.setJobName(DataBalancer.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[2]));

    if (args.length > 3) {
        if (args[3].startsWith("bzip"))
            FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
        if (args[3].startsWith("gz"))
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    }//ww w  .j  a va  2 s . co  m
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.core.util.DataGenerator.java

License:Apache License

public static void main(String[] args) throws IOException {

    JobConf job = new JobConf(DataGenerator.class);
    FileSystem dfs = FileSystem.get(job);
    String maxFile = "/maxtemp";
    dfs.delete(new Path(maxFile), true);

    job.setJobName(DataGenerator.class.getSimpleName() + "max ID");
    job.setMapperClass(MapMaxId.class);
    job.setCombinerClass(CombineMaxId.class);
    job.setReducerClass(ReduceMaxId.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(VLongWritable.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(maxFile));
    job.setNumReduceTasks(1);
    JobClient.runJob(job);//from w  w  w . j  a va2 s. co m

    job = new JobConf(DataGenerator.class);
    job.set("hyracks.maxid.file", maxFile);
    job.setInt("hyracks.x", Integer.parseInt(args[2]));
    dfs.delete(new Path(args[1]), true);

    job.setJobName(DataGenerator.class.getSimpleName());
    job.setMapperClass(MapRecordGen.class);
    job.setReducerClass(ReduceRecordGen.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[3]));

    if (args.length > 4) {
        if (args[4].startsWith("bzip"))
            FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
        if (args[4].startsWith("gz"))
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    }
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.CommonSource.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(GraphPreProcessor.class);

    job.setJobName(GraphPreProcessor.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setInputFormat(TextInputFormat.class);
    for (int i = 0; i < args.length - 2; i++) {
        FileInputFormat.addInputPath(job, new Path(args[i]));
    }// w ww  .  j a v a2  s . co m
    FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2]));
    job.setNumReduceTasks(Integer.parseInt(args[args.length - 1]));
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.DuplicateGraph.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(DuplicateGraph.class);

    job.setJobName(DuplicateGraph.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(0);
    JobClient.runJob(job);/*from  w w w  .j a  v a  2  s .com*/
}

From source file:edu.uci.ics.pregelix.example.utils.FilterCount.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(GraphPreProcessor.class);

    job.setJobName(FilterCount.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormat(TextInputFormat.class);

    job.setInputFormat(TextInputFormat.class);
    for (int i = 0; i < args.length - 1; i++) {
        FileInputFormat.addInputPath(job, new Path(args[i]));
    }//from   w w  w. j  av a  2s. c o  m
    FileOutputFormat.setOutputPath(job, new Path(args[args.length - 1]));
    job.setNumReduceTasks(0);
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.FindLargest.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(GraphPreProcessor.class);

    job.setJobName(GraphPreProcessor.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setCombinerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setInputFormat(TextInputFormat.class);
    for (int i = 0; i < args.length - 2; i++) {
        FileInputFormat.addInputPath(job, new Path(args[i]));
    }//w w w .j  a  va  2  s  . c  o  m
    FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2]));
    job.setNumReduceTasks(Integer.parseInt(args[args.length - 1]));
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.GraphPreProcessor.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(GraphPreProcessor.class);

    job.setJobName(GraphPreProcessor.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(0);
    JobClient.runJob(job);/*from w ww .  j a  v  a2s  . c o  m*/
}

From source file:edu.uci.ics.pregelix.example.utils.VertexAggregator.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(VertexAggregator.class);

    job.setJobName(VertexAggregator.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setCombinerClass(CombineRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    JobClient.runJob(job);/*ww  w.  j  a  v a  2  s  . co m*/
}

From source file:edu.uci.ics.pregelix.example.utils.VertexSorter.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(VertexSorter.class);

    job.setJobName(VertexSorter.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    JobClient.runJob(job);//from   www  . ja  v  a  2 s. co m
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    // job.set("mapred.job.tracker", "local");
    // job.set("fs.default.name", "file:///");

    JobConf job = new JobConf();
    job.setJarByClass(HybridDriver.class);
    new GenericOptionsParser(job, args);
    setMapperAndRunner(job);/*from   w  ww.j a va  2  s . c o  m*/
    job.setMapOutputKeyClass(DocDocWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(DocDocWritable.class);
    job.setOutputValueClass(FloatWritable.class);

    Path inputPath = new Path(INPUT_DIR);
    CustomSequenceFileInputFormat.addInputPath(job, inputPath);
    Path outputPath = new Path(OUTPUT_DIR);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    job.setBoolean("fs.hdfs.impl.disable.cache", true); //xun not sure if needed

    if (job.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE)) {
        job.setInputFormat(CustomSequenceFileInputFormat.class);
        Long splitMB = job.getLong(Config.SPLIT_MB_PROPERTY, Config.SPLIT_MB_VALUE) * 1024 * 1024;
        job.setLong("mapred.min.split.size", splitMB);
        job.setLong("mapred.max.split.size", splitMB);
        job.setLong("dfs.block.size", splitMB);
    } else {
        //  Comment the following of splitter for www experiments it assumes no splitting
        // of partitions for load balancing, should be fixed.
        Splitter.configure(job, inputPath);// remove comment unless for www
        job.setInputFormat(NonSplitableSequenceInputFormat.class); //remove comment
    }
    //SIGIR'14 two-stage balancing //not yet fully incorporated 
    if (job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE) != 0) {
        TwoStageLoadbalancing.main(job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE),
                new Path(PartDriver.OUTPUT_DIR), job);
    }
    JobSubmitter.run(job, "SIMILARITY", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
    if (job.getBoolean(Config.CONVERT_TEXT_PROPERTY, Config.CONVERT_TEXT_VALUE))
        IDMappingJob(args);
}