Example usage for org.apache.hadoop.mapred JobConf getNumReduceTasks

List of usage examples for org.apache.hadoop.mapred JobConf getNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getNumReduceTasks.

Prototype

public int getNumReduceTasks() 

Source Link

Document

Get the configured number of reduce tasks for this job.

Usage

From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopUtils.java

License:Open Source License

/**
 * Creates JobInfo from hadoop configuration.
 *
 * @param cfg Hadoop configuration.//ww w .j  a  v a2s  . c o m
 * @return Job info.
 * @throws GridException If failed.
 */
public static GridHadoopDefaultJobInfo createJobInfo(Configuration cfg) throws GridException {
    JobConf jobConf = new JobConf(cfg);

    boolean hasCombiner = jobConf.get("mapred.combiner.class") != null
            || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null;

    int numReduces = jobConf.getNumReduceTasks();

    jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null);

    if (jobConf.getUseNewMapper()) {
        String mode = "new map API";

        ensureNotSet(jobConf, "mapred.input.format.class", mode);
        ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode);

        if (numReduces != 0)
            ensureNotSet(jobConf, "mapred.partitioner.class", mode);
        else
            ensureNotSet(jobConf, "mapred.output.format.class", mode);
    } else {
        String mode = "map compatibility";

        ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode);
        ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode);

        if (numReduces != 0)
            ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode);
        else
            ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode);
    }

    if (numReduces != 0) {
        jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null);

        if (jobConf.getUseNewReducer()) {
            String mode = "new reduce API";

            ensureNotSet(jobConf, "mapred.output.format.class", mode);
            ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode);
        } else {
            String mode = "reduce compatibility";

            ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode);
            ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode);
        }
    }

    Map<String, String> props = new HashMap<>();

    for (Map.Entry<String, String> entry : jobConf)
        props.put(entry.getKey(), entry.getValue());

    return new GridHadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces,
            props);
}

From source file:org.macau.util.FuzzyJoinDriver.java

License:Apache License

/**
 * //from  w w w.java  2 s .c om
 * @param job
 * @throws IOException
 * run the job and output the basic information of the job
 * the start time
 * the finished time
 * the running time(finished_Time - start_Time)
 */
public static void run(JobConf job) throws IOException {
    job.setJarByClass(FuzzyJoinDriver.class);
    //
    // print info
    //
    String ret = "FuzzyJoinDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Jobs:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Jobs: " + job.getNumReduceTasks() + "\n"
            + "  Properties:  {";
    String[][] properties = new String[][] {
            new String[] { FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE },
            new String[] { FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
                    "" + FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE },
            new String[] { FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE },
            new String[] { TOKENS_PACKAGE_PROPERTY, TOKENS_PACKAGE_VALUE },
            new String[] { TOKENS_LENGTHSTATS_PROPERTY, "" + TOKENS_LENGTHSTATS_VALUE },
            new String[] { RIDPAIRS_GROUP_CLASS_PROPERTY, RIDPAIRS_GROUP_CLASS_VALUE },
            new String[] { RIDPAIRS_GROUP_FACTOR_PROPERTY, "" + RIDPAIRS_GROUP_FACTOR_VALUE },
            new String[] { FuzzyJoinConfig.DATA_TOKENS_PROPERTY, "" },
            new String[] { DATA_JOININDEX_PROPERTY, "" }, };
    for (int crt = 0; crt < properties.length; crt++) {
        if (crt > 0) {
            ret += "\n                ";
        }
        ret += properties[crt][0] + "=" + job.get(properties[crt][0], properties[crt][1]);
    }
    ret += "}";
    System.out.println(ret);
    //
    // run job
    //
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
            "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
}

From source file:org.mitre.bio.mapred.TotalSequenceLength.java

License:Open Source License

/**
 * Init the job with the given parameters and run it.
 *
 * @param jobConf   the hadoop job configuration
 * @param input     input {@link SequenceFile} path
 * @param output    output path (this will contain ONE part with the length)
 * @return zero if successful//from  w  w w  . j av a  2  s  . c  o  m
 * @throws java.lang.Exception
 */
public int initJob(JobConf jobConf, String input, String output, boolean cleanLogs) throws Exception {
    JobConf conf = new JobConf(jobConf, TotalSequenceLength.class);
    conf.setJobName("TotalSequenceLength");

    // We can only handle one reducer
    if (conf.getNumReduceTasks() != 1) {
        conf.setNumReduceTasks(1);
        LOG.info("Setting number of reducers to ONE!");
    }

    SequenceFileInputFormat.setInputPaths(conf, new Path(input));
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(SequenceMapClass.class);
    conf.setOutputKeyClass(IntWritable.class); // map output key class
    conf.setOutputValueClass(IntWritable.class); // map output value class

    conf.setCombinerClass(LengthReduceClass.class);
    conf.setReducerClass(LengthReduceClass.class);
    FileOutputFormat.setOutputPath(conf, new Path(output));

    JobClient.runJob(conf);

    if (cleanLogs) {
        LOG.info("removing log directory");
        Path path = new Path(output, "_logs");
        FileSystem fs = path.getFileSystem(jobConf);
        fs.delete(path, true);
    }

    return 0;
}

From source file:org.pentaho.hadoop.mapreduce.MRUtil.java

License:Apache License

public static void passInformationToTransformation(final VariableSpace variableSpace, final JobConf job) {
    if (variableSpace != null && job != null) {
        variableSpace.setVariable("Internal.Hadoop.NumMapTasks", Integer.toString(job.getNumMapTasks()));
        variableSpace.setVariable("Internal.Hadoop.NumReduceTasks", Integer.toString(job.getNumReduceTasks()));
        String taskId = job.get("mapred.task.id");
        variableSpace.setVariable("Internal.Hadoop.TaskId", taskId);
        // TODO: Verify if the string range holds true for all Hadoop distributions
        // Extract the node number from the task ID.
        // The consensus currently is that it's the part after the last underscore.
        ///*from  ww  w.j a v  a2  s  .c o m*/
        // Examples:
        // job_201208090841_9999
        // job_201208090841_10000
        //
        String nodeNumber;
        if (Const.isEmpty(taskId)) {
            nodeNumber = "0";
        } else {
            int lastUnderscoreIndex = taskId.lastIndexOf("_");
            if (lastUnderscoreIndex >= 0) {
                nodeNumber = taskId.substring(lastUnderscoreIndex + 1);
            } else {
                nodeNumber = "0";
            }
        }
        // get rid of zeroes.
        //
        variableSpace.setVariable("Internal.Hadoop.NodeNumber", Integer.toString(Integer.valueOf(nodeNumber)));
    }
}

From source file:org.smartfrog.services.hadoop.mapreduce.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample keys. By default reads 100,000 keys from 10
 * locations in the input, sorts them and picks N-1 keys to generate N equally sized partitions.
 *
 * @param conf     the job to sample//from   www  .j a v a2 s .c om
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
 */
public static void writePartitionFile(JobConf conf, Path partFile) throws IOException {
    TeraInputFormat inFormat = new TeraInputFormat();
    TextSampler sampler = new TextSampler();
    Text key = new Text();
    Text value = new Text();
    int partitions = conf.getNumReduceTasks();
    long sampleSize = conf.getLong(TeraConstants.SAMPLE_SIZE, 100000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    long records = 0;
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        RecordReader<Text, Text> reader = inFormat.getRecordReader(splits[sampleStep * i], conf, null);
        while (reader.next(key, value)) {
            sampler.addKey(key);
            records += 1;
            if ((i + 1) * recordsPerSample <= records) {
                break;
            }
        }
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, Text.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    for (Text split : sampler.createPartitions(partitions)) {
        writer.append(split, nullValue);
    }
    writer.close();
}

From source file:tap.core.MapperBridge.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/* w ww .j  av  a  2  s . c o  m*/
public void configure(JobConf conf) {
    this.mapper = ReflectionUtils.newInstance(conf.getClass(Phase.MAPPER, TapMapper.class, TapMapper.class),
            conf);
    this.isMapOnly = conf.getNumReduceTasks() == 0;
    try {
        determineInputFormat(conf);
        determineOutputFormat(conf);
        this.groupBy = conf.get(Phase.GROUP_BY);
        this.sortBy = conf.get(Phase.SORT_BY);
    } catch (Exception e) {
        if (e instanceof RuntimeException)
            throw (RuntimeException) e;
        throw new RuntimeException(e);
    }

    mapper.setConf(conf);
    mapper.init(conf.get("map.input.file"));
}