Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n) 

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:BMTTableLoader.java

License:Apache License

public int run(String[] args) {
    JobConf conf = new JobConf(getConf(), BMTTableLoader.class);
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);

    conf.setJobName("BMTTableLoader");
    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        other_args.add(args[i]);//from ww  w  .  j av  a2  s.c  om
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return 0;
}

From source file:NaivePageRank.java

License:Apache License

public static void main(String[] args) throws Exception {
    int iteration = -1;
    String inputPath = args[0];/*from  ww  w .ja  va  2s  . co m*/
    String outputPath = args[1];
    int specIteration = 0;
    if (args.length > 2) {
        specIteration = Integer.parseInt(args[2]);
    }
    int numNodes = 100000;
    if (args.length > 3) {
        numNodes = Integer.parseInt(args[3]);
    }
    int numReducers = 32;
    if (args.length > 4) {
        numReducers = Integer.parseInt(args[4]);
    }
    System.out.println("specified iteration: " + specIteration);
    long start = System.currentTimeMillis();

    /**
     * job to count out-going links for each url
     */
    JobConf conf = new JobConf(NaivePageRank.class);
    conf.setJobName("PageRank-Count");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(CountMapper.class);
    conf.setReducerClass(CountReducer.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/count"));
    conf.setNumReduceTasks(numReducers);
    JobClient.runJob(conf);

    /******************** Initial Rank Assignment Job ***********************/
    conf = new JobConf(NaivePageRank.class);
    conf.setJobName("PageRank-Initialize");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(InitialRankAssignmentMapper.class);
    conf.setReducerClass(InitialRankAssignmentReducer.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration));
    conf.setNumReduceTasks(numReducers);
    // conf.setIterative(false);
    JobClient.runJob(conf);
    iteration++;

    do {
        /****************** Join Job ********************************/
        conf = new JobConf(NaivePageRank.class);
        conf.setJobName("PageRank-Join");
        conf.setOutputKeyClass(Text.class);
        // conf.setOutputValueClass(Text.class);
        conf.setMapperClass(ComputeRankMap.class);
        conf.setReducerClass(ComputeRankReduce.class);
        conf.setMapOutputKeyClass(TextPair.class);
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
        conf.setPartitionerClass(FirstPartitioner.class);
        conf.setOutputKeyComparatorClass(KeyComparator.class);
        conf.setOutputValueGroupingComparator(GroupComparator.class);

        // relation table
        FileInputFormat.setInputPaths(conf, new Path(inputPath));
        // rank table
        FileInputFormat.addInputPath(conf, new Path(outputPath + "/i" + (iteration - 1)));
        // count table
        FileInputFormat.addInputPath(conf, new Path(outputPath + "/count"));
        FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration));
        conf.setNumReduceTasks(numReducers);
        JobClient.runJob(conf);
        iteration++;

        /******************** Rank Aggregate Job ***********************/
        conf = new JobConf(NaivePageRank.class);
        conf.setJobName("PageRank-Aggregate");
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);
        conf.setMapOutputKeyClass(Text.class);
        conf.setMapperClass(RankAggregateMapper.class);
        conf.setReducerClass(RankAggregateReducer.class);
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
        FileInputFormat.setInputPaths(conf, new Path(outputPath + "/i" + (iteration - 1)));
        FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration));
        conf.setNumReduceTasks(numReducers);
        conf.setInt("haloop.num.nodes", numNodes);
        JobClient.runJob(conf);
        iteration++;
    } while (iteration < 2 * specIteration);

    long end = System.currentTimeMillis();
    System.out.println("running time " + (end - start) / 1000 + "s");
}

From source file:RepackWikipedia.java

License:Apache License

@SuppressWarnings("static-access")
@Override/*from w  w w . j a  v a2 s .  c  o m*/
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output location")
            .create(OUTPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("mapping file")
            .create(MAPPING_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("block|record|none").hasArg()
            .withDescription("compression type").create(COMPRESSION_TYPE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de").hasArg().withDescription("two-letter language code")
            .create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(MAPPING_FILE_OPTION) || !cmdline.hasOption(COMPRESSION_TYPE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputPath = cmdline.getOptionValue(OUTPUT_OPTION);
    String mappingFile = cmdline.getOptionValue(MAPPING_FILE_OPTION);
    String compressionType = cmdline.getOptionValue(COMPRESSION_TYPE_OPTION);

    if (!"block".equals(compressionType) && !"record".equals(compressionType)
            && !"none".equals(compressionType)) {
        System.err.println("Error: \"" + compressionType + "\" unknown compression type!");
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    // this is the default block size
    int blocksize = 1000000;

    //Job job = Job.getInstance(getConf());
    JobConf conf = new JobConf(RepackWikipedia.class);
    conf.setJarByClass(RepackWikipedia.class);
    conf.setJobName(String.format("RepackWikipedia[%s: %s, %s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            OUTPUT_OPTION, outputPath, COMPRESSION_TYPE_OPTION, compressionType, LANGUAGE_OPTION, language));

    conf.set(DOCNO_MAPPING_FIELD, mappingFile);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - XML dump file: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - docno mapping data file: " + mappingFile);
    LOG.info(" - compression type: " + compressionType);
    LOG.info(" - language: " + language);

    if ("block".equals(compressionType)) {
        LOG.info(" - block size: " + blocksize);
    }

    conf.setNumReduceTasks(0);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    if ("none".equals(compressionType)) {
        FileOutputFormat.setCompressOutput(conf, false);
    } else {
        FileOutputFormat.setCompressOutput(conf, true);

        if ("record".equals(compressionType)) {
            SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.RECORD);
        } else {
            SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);
            conf.setInt("io.seqfile.compress.blocksize", blocksize);
        }
    }

    if (language != null) {
        conf.set("wiki.language", language);
    }

    conf.setInputFormat(WikipediaPageInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(WikipediaPage.class);

    conf.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(outputPath), true);

    //job.waitForCompletion(true);
    JobClient.runJob(conf);

    return 0;
}

From source file:FormatStorage2ColumnStorageMR.java

License:Open Source License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.println("FormatStorage2ColumnStorageMR <input> <output>");
        System.exit(-1);/*from   w w w. jav  a 2 s . co  m*/
    }

    JobConf conf = new JobConf(FormatStorageMR.class);

    conf.setJobName("FormatStorage2ColumnStorageMR");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(4);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Unit.Record.class);

    conf.setMapperClass(FormatStorageMapper.class);
    conf.setReducerClass(ColumnStorageReducer.class);

    conf.setInputFormat(FormatStorageInputFormat.class);
    conf.set("mapred.output.compress", "flase");

    Head head = new Head();
    initHead(head);

    head.toJobConf(conf);

    FileInputFormat.setInputPaths(conf, args[0]);
    Path outputPath = new Path(args[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);

    FileSystem fs = outputPath.getFileSystem(conf);
    fs.delete(outputPath, true);

    JobClient jc = new JobClient(conf);
    RunningJob rj = null;
    rj = jc.submitJob(conf);

    String lastReport = "";
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
    long reportTime = System.currentTimeMillis();
    long maxReportInterval = 3 * 1000;
    while (!rj.isComplete()) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        }

        int mapProgress = Math.round(rj.mapProgress() * 100);
        int reduceProgress = Math.round(rj.reduceProgress() * 100);

        String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

        if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

            String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
            System.out.println(output);
            lastReport = report;
            reportTime = System.currentTimeMillis();
        }
    }

    System.exit(0);

}

From source file:TestColumnStorageInputFormat.java

License:Open Source License

public static void main(String[] argv) throws IOException, SerDeException {
    try {//from   ww  w  .j  av a 2  s.c o m
        if (argv.length != 2) {
            System.out.println("TestColumnStorageInputFormat <input> idx");
            System.exit(-1);
        }

        JobConf conf = new JobConf(TestColumnStorageInputFormat.class);

        conf.setJobName("TestColumnStorageInputFormat");

        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(1);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Unit.Record.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.set("mapred.output.compress", "flase");

        conf.set("mapred.input.dir", argv[0]);

        conf.set("hive.io.file.readcolumn.ids", argv[1]);

        FormatStorageSerDe serDe = initSerDe(conf);
        StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();

        FileInputFormat.setInputPaths(conf, argv[0]);
        Path outputPath = new Path(argv[1]);
        FileOutputFormat.setOutputPath(conf, outputPath);

        InputFormat inputFormat = new ColumnStorageInputFormat();
        long begin = System.currentTimeMillis();
        InputSplit[] inputSplits = inputFormat.getSplits(conf, 1);
        long end = System.currentTimeMillis();
        System.out.println("getsplit delay " + (end - begin) + " ms");

        if (inputSplits.length == 0) {
            System.out.println("inputSplits is empty");
            return;
        } else {
            System.out.println("get Splits:" + inputSplits.length);
        }

        int size = inputSplits.length;
        System.out.println("getSplits return size:" + size);
        for (int i = 0; i < size; i++) {
            ColumnStorageSplit split = (ColumnStorageSplit) inputSplits[i];
            System.out.printf("split:" + i + " offset:" + split.getStart() + "len:" + split.getLength()
                    + "path:" + split.getPath().toString() + "beginLine:" + split.getBeginLine() + "endLine:"
                    + split.getEndLine());
            if (split.getFileName() != null) {
                System.out.println("fileName:" + split.getFileName());
            } else {
                System.out.println("fileName null");
            }
            if (split.fileList() != null) {
                System.out.println("fileList.num:" + split.fileList().size());
                for (int j = 0; j < split.fileList().size(); j++) {
                    System.out.println("filelist " + j + ":" + split.fileList().get(j));
                }
            }
        }

        while (true) {
            int totalDelay = 0;
            RecordReader<WritableComparable, Writable> currRecReader = null;
            for (int i = 0; i < inputSplits.length; i++) {
                currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);

                WritableComparable key;
                Writable value;

                key = currRecReader.createKey();
                value = currRecReader.createValue();

                begin = System.currentTimeMillis();
                int count = 0;
                while (currRecReader.next(key, value)) {

                    Record record = (Record) value;

                    Object row = serDe.deserialize(record);
                    count++;

                }
                end = System.currentTimeMillis();

                long delay = (end - begin) / 1000;
                totalDelay += delay;
                System.out.println(count + " record read over, delay " + delay + " s");
            }

            System.out.println("total delay:" + totalDelay + "\n");
        }

    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("get exception:" + e.getMessage());
    }
}

From source file:BP.java

License:Apache License

protected JobConf configInitMessage() throws Exception {
    final JobConf conf = new JobConf(getConf(), BP.class);
    conf.set("nstate", "" + nstate);
    conf.set("compat_matrix_str", "" + edge_potential_str);
    conf.setJobName("BP_Init_Belief");

    conf.setMapperClass(MapInitializeBelief.class);

    fs.delete(message_cur_path, true);/* w ww  . j  a  v  a 2 s .  c  om*/

    FileInputFormat.setInputPaths(conf, edge_path);
    FileOutputFormat.setOutputPath(conf, message_cur_path);

    conf.setNumReduceTasks(0);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    return conf;
}

From source file:BP.java

License:Apache License

protected JobConf configUpdateMessage() throws Exception {
    final JobConf conf = new JobConf(getConf(), BP.class);
    conf.set("nstate", "" + nstate);
    conf.set("compat_matrix_str", "" + edge_potential_str);
    conf.setJobName("BP_Update_message");

    conf.setMapperClass(MapUpdateMessage.class);
    conf.setReducerClass(RedUpdateMessage.class);

    fs.delete(message_next_path, true);/*from www .  java  2  s.c  o  m*/

    FileInputFormat.setInputPaths(conf, message_cur_path, prior_path);
    FileOutputFormat.setOutputPath(conf, message_next_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:BP.java

License:Apache License

protected JobConf configCheckErr() throws Exception {
    final JobConf conf = new JobConf(getConf(), BP.class);
    conf.set("nstate", "" + nstate);
    conf.setJobName("BP_Check Err");

    fs.delete(check_error_path, true);//from w  w w. j a va 2s.  c  o m

    conf.setMapperClass(MapCheckErr.class);
    conf.setReducerClass(RedCheckErr.class);

    FileInputFormat.setInputPaths(conf, message_cur_path, message_next_path);
    FileOutputFormat.setOutputPath(conf, check_error_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:BP.java

License:Apache License

protected JobConf configSumErr() throws Exception {
    final JobConf conf = new JobConf(getConf(), BP.class);
    conf.set("nstate", "" + nstate);
    conf.setJobName("BP_Sum Err");

    fs.delete(sum_error_path, true);/* w ww.j a  va2  s  . c o m*/

    conf.setMapperClass(MapSumErr.class);
    conf.setReducerClass(RedSumErr.class);

    FileInputFormat.setInputPaths(conf, check_error_path);
    FileOutputFormat.setOutputPath(conf, sum_error_path);

    conf.setNumReduceTasks(1);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:BP.java

License:Apache License

protected JobConf configComputeBelief() throws Exception {
    final JobConf conf = new JobConf(getConf(), BP.class);
    conf.set("nstate", "" + nstate);
    conf.set("compat_matrix_str", "" + edge_potential_str);
    conf.setJobName("BP_Compute_Belief");

    conf.setMapperClass(MapComputeBelief.class);
    conf.setReducerClass(RedComputeBelief.class);

    fs.delete(output_path, true);/*from ww w .j  a  v  a  2  s  .  c  om*/

    FileInputFormat.setInputPaths(conf, message_cur_path, prior_path);
    FileOutputFormat.setOutputPath(conf, output_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}