Example usage for org.apache.hadoop.mapred JobConf getNumReduceTasks

List of usage examples for org.apache.hadoop.mapred JobConf getNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getNumReduceTasks.

Prototype

public int getNumReduceTasks() 

Source Link

Document

Get the configured number of reduce tasks for this job.

Usage

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

private int getNumReduceTasks(JobConf jobConf) {
    int numReduceTasks = Math.min(maxReducers, jobConf.getNumReduceTasks());
    return numReduceTasks;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

private IOperatorDescriptor addReducer(IOperatorDescriptor previousOperator, JobConf jobConf,
        JobSpecification spec) throws Exception {
    IOperatorDescriptor mrOutputOperator = previousOperator;
    if (jobConf.getNumReduceTasks() != 0) {
        IOperatorDescriptor sorter = getExternalSorter(jobConf, spec);
        HadoopReducerOperatorDescriptor reducer = getReducer(jobConf, spec, false);
        int numReduceTasks = getNumReduceTasks(jobConf);
        configurePartitionCountConstraint(spec, sorter, numReduceTasks);
        configurePartitionCountConstraint(spec, reducer, numReduceTasks);

        IConnectorDescriptor mToNConnectorDescriptor = getMtoNHashPartitioningConnector(jobConf, spec);
        spec.connect(mToNConnectorDescriptor, previousOperator, 0, sorter, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, reducer, 0);
        mrOutputOperator = reducer;/*w w w.  j av  a2  s  .c om*/
    }
    return mrOutputOperator;
}

From source file:edu.ucsb.cs.knn.KnnDriver.java

License:Apache License

/**
 * Submit the configured job to Hadoop JobTracker to start the process.
 *//*from   www . j a  va 2  s.  com*/
public static void run(JobConf job) throws IOException {

    job.setJarByClass(KnnDriver.class); // This method sets the jar
    String ret = stars() + "\nKnnDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Tasks:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Tasks: " + job.getNumReduceTasks() + "\n";
    ret += "  Threshold:    " + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n";
    System.out.println(ret);
    //
    // run job
    //
    JobClient.runJob(job);
}

From source file:edu.ucsb.cs.utilities.JobSubmitter.java

License:Apache License

public static void run(JobConf job, String title, float Threshold) {

    String ret = stars() + "\n [" + title + "]\n" + stars() + "\n  Running job:  " + job.getJobName()
            + "\n  Input Path:   {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }/* w  w w.  j  a va  2s . c o  m*/
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path:  " + FileOutputFormat.getOutputPath(job) + "\n" + "  Num. of mappers: "
            + job.getNumMapTasks() + "\n" + "  Num. of reducers: " + job.getNumReduceTasks() + "\n";
    if (Threshold != -1)
        ret += "  Threshold: " + Threshold + "\n";
    //      for (int ctr = 0; ctr < Properties.requiredParameters.size(); ctr++)//check
    //         ret += Properties.requiredParameters.get(ctr) + "\n";
    System.out.println(ret);
    try {
        Date startTime = new Date();
        JobClient.runJob(job);
        Date end_time = new Date();
        System.err.println(
                "Job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");

    } catch (IOException e) {
        System.err.println("ERROR: While submitting the job :(");
        e.printStackTrace();
    }
}

From source file:edu.umn.cs.spatialHadoop.core.GridPartitioner.java

License:Open Source License

/**
 * Initializes a grid partitioner for a given file
 * @param inFile//from w  w w  .  j  a  v  a 2  s. c o  m
 * @param params
 */
public GridPartitioner(Path[] inFiles, JobConf job) {
    Rectangle inMBR = (Rectangle) OperationsParams.getShape(job, "mbr");
    this.gridInfo = new GridInfo(inMBR.x1, inMBR.y1, inMBR.x2, inMBR.y2);
    int numOfPartitions = job.getInt("m", job.getNumReduceTasks() * job.getNumReduceTasks() * 1000);
    this.gridInfo.calculateCellDimensions(numOfPartitions);
}

From source file:org.apache.avro.mapred.AvroMultipleOutputs.java

License:Apache License

/**
 * Adds a named output for the job./*from   www  .j a  va  2s.co  m*/
 * <p/>
 *
 * @param conf              job conf to add the named output
 * @param namedOutput       named output name, it has to be a word, letters
 *                          and numbers only, cannot be the word 'part' as
 *                          that is reserved for the
 *                          default output.
 * @param multi             indicates if the named output is multi
 * @param outputFormatClass OutputFormat class.
 * @param schema            Schema to used for this namedOutput
 */
private static void addNamedOutput(JobConf conf, String namedOutput, boolean multi,
        Class<? extends OutputFormat> outputFormatClass, Schema schema) {
    checkNamedOutputName(namedOutput);
    checkNamedOutput(conf, namedOutput, true);
    boolean isMapOnly = conf.getNumReduceTasks() == 0;
    if (schema != null)
        conf.set(MO_PREFIX + namedOutput + ".schema", schema.toString());
    conf.set(NAMED_OUTPUTS, conf.get(NAMED_OUTPUTS, "") + " " + namedOutput);
    conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class);
    conf.setBoolean(MO_PREFIX + namedOutput + MULTI, multi);
}

From source file:org.apache.avro.mapred.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name,
        Progressable prog) throws IOException {

    boolean isMapOnly = job.getNumReduceTasks() == 0;
    Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
    GenericData dataModel = AvroJob.createDataModel(job);

    final DataFileWriter<T> writer = new DataFileWriter<T>(dataModel.createDatumWriter(null));

    configureDataFileWriter(writer, job);

    Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            writer.append(wrapper.datum());
        }//from  w  w w .  j a  va 2 s .  co m

        public void close(Reporter reporter) throws IOException {
            writer.close();
        }
    };
}

From source file:org.apache.avro.mapred.DelegatingMapper.java

License:Apache License

public void configure(JobConf conf) {
    this.conf = conf;
    this.isMapOnly = conf.getNumReduceTasks() == 0;
}

From source file:org.apache.avro.mapred.HadoopMapper.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
public void configure(JobConf conf) {
    this.mapper = ReflectionUtils.newInstance(conf.getClass(AvroJob.MAPPER, AvroMapper.class, AvroMapper.class),
            conf);//from   www  . ja  va 2s.  co m
    this.isMapOnly = conf.getNumReduceTasks() == 0;
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopUtils.java

License:Apache License

/**
 * Creates JobInfo from hadoop configuration.
 *
 * @param cfg Hadoop configuration./*from  w w  w.  j av  a 2s  .  co  m*/
 * @return Job info.
 * @throws IgniteCheckedException If failed.
 */
public static GridHadoopDefaultJobInfo createJobInfo(Configuration cfg) throws IgniteCheckedException {
    JobConf jobConf = new JobConf(cfg);

    boolean hasCombiner = jobConf.get("mapred.combiner.class") != null
            || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null;

    int numReduces = jobConf.getNumReduceTasks();

    jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null);

    if (jobConf.getUseNewMapper()) {
        String mode = "new map API";

        ensureNotSet(jobConf, "mapred.input.format.class", mode);
        ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode);

        if (numReduces != 0)
            ensureNotSet(jobConf, "mapred.partitioner.class", mode);
        else
            ensureNotSet(jobConf, "mapred.output.format.class", mode);
    } else {
        String mode = "map compatibility";

        ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode);
        ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode);

        if (numReduces != 0)
            ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode);
        else
            ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode);
    }

    if (numReduces != 0) {
        jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null);

        if (jobConf.getUseNewReducer()) {
            String mode = "new reduce API";

            ensureNotSet(jobConf, "mapred.output.format.class", mode);
            ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode);
        } else {
            String mode = "reduce compatibility";

            ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode);
            ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode);
        }
    }

    Map<String, String> props = new HashMap<>();

    for (Map.Entry<String, String> entry : jobConf)
        props.put(entry.getKey(), entry.getValue());

    return new GridHadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces,
            props);
}