Example usage for org.apache.hadoop.mapreduce MRJobConfig NUM

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce MRJobConfig NUM_REDUCES.

Prototype

String NUM_REDUCES

To view the source code for org.apache.hadoop.mapreduce MRJobConfig NUM_REDUCES.

Click Source Link

Usage

From source file:eastcircle.terasort.TotalOrderPartitioner.java

License:Apache License

/**
 * Read the cut points from the given sequence file.
 * @param fs the file system/*from ww w. ja va  2  s .  c  o  m*/
 * @param p the path to read
 * @param job the job config
 * @return the strings to split the partitions on
 * @throws IOException
 */
private static Text[] readPartitions(FileSystem fs, Path p, Configuration conf) throws IOException {
    int reduces = conf.getInt(MRJobConfig.NUM_REDUCES, 1);
    Text[] result = new Text[reduces - 1];
    DataInputStream reader = fs.open(p);
    for (int i = 0; i < reduces - 1; ++i) {
        result[i] = new Text();
        result[i].readFields(reader);
    }
    reader.close();
    return result;
}

From source file:eastcircle.terasort.TotalOrderPartitioner.java

License:Apache License

private void writeObject(ObjectOutputStream out) throws IOException {
    out.writeUTF(conf.get("fs.defaultFS"));
    out.writeInt(conf.getInt(MRJobConfig.NUM_REDUCES, 2));
    /**//from  w  w  w.j a  v a2 s  .c o m
     * Instead of serializing the trie,
     *  we serialize the filename containing sampling points
     *  so that we can rebuild the trie in each task.
     */
    out.writeUTF(this.partFile.toString());
}

From source file:eastcircle.terasort.TotalOrderPartitioner.java

License:Apache License

private void readObject(ObjectInputStream in) throws IOException {
    this.conf = new Configuration();
    conf.set("fs.defaultFS", (String) in.readUTF());
    conf.setInt(MRJobConfig.NUM_REDUCES, (int) in.readInt());
    this.partFile = new Path((String) in.readUTF());
    this.trie = buildTrieFromHDFS(conf, partFile);
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOTest.java

License:Apache License

private static Configuration loadTestConfiguration(Class<?> outputFormatClassName, Class<?> keyClass,
        Class<?> valueClass) {
    Configuration conf = new Configuration();
    conf.setClass(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClassName, OutputFormat.class);
    conf.setClass(MRJobConfig.OUTPUT_KEY_CLASS, keyClass, Object.class);
    conf.setClass(MRJobConfig.OUTPUT_VALUE_CLASS, valueClass, Object.class);
    conf.setInt(MRJobConfig.NUM_REDUCES, REDUCERS_COUNT);
    conf.set(MRJobConfig.ID, String.valueOf(1));
    return conf;//w  w w.j ava  2s .  co m
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormats.java

License:Apache License

/**
 * Returns count of the reducers specified under key {@link MRJobConfig#NUM_REDUCES} in hadoop
 * {@link Configuration}.//from  w  ww.  j  ava  2  s.  c o  m
 *
 * @param conf hadoop {@link Configuration}
 * @return configured count of reducers
 */
static int getReducersCount(Configuration conf) {
    return conf.getInt(MRJobConfig.NUM_REDUCES, DEFAULT_NUM_REDUCERS);
}

From source file:org.apache.druid.indexer.IndexGeneratorJobTest.java

License:Apache License

@Before
public void setUp() throws Exception {
    mapper = HadoopDruidIndexerConfig.JSON_MAPPER;
    mapper.registerSubtypes(new NamedType(HashBasedNumberedShardSpec.class, "hashed"));
    mapper.registerSubtypes(new NamedType(SingleDimensionShardSpec.class, "single"));

    dataFile = temporaryFolder.newFile();
    tmpDir = temporaryFolder.newFolder();

    HashMap<String, Object> inputSpec = new HashMap<String, Object>();
    inputSpec.put("paths", dataFile.getCanonicalPath());
    inputSpec.put("type", "static");
    if (inputFormatName != null) {
        inputSpec.put("inputFormat", inputFormatName);
    }/*w ww .  j a  v  a2s .  c o m*/

    if (SequenceFileInputFormat.class.getName().equals(inputFormatName)) {
        writeDataToLocalSequenceFile(dataFile, data);
    } else {
        FileUtils.writeLines(dataFile, data);
    }

    config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(
            new DataSchema(datasourceName, mapper.convertValue(inputRowParser, Map.class), aggs,
                    new UniformGranularitySpec(Granularities.DAY, Granularities.NONE,
                            ImmutableList.of(this.interval)),
                    null, mapper),
            new HadoopIOConfig(ImmutableMap.copyOf(inputSpec), null, tmpDir.getCanonicalPath()),
            new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, maxRowsInMemory,
                    maxBytesInMemory, true, false, false, false, ImmutableMap.of(MRJobConfig.NUM_REDUCES, "0"), //verifies that set num reducers is ignored
                    false, useCombiner, null, true, null, forceExtendableShardSpecs, false, null, null, null,
                    null)));

    config.setShardSpecs(loadShardSpecs(partitionType, shardInfoForEachSegment));
    config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
}

From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java

License:Apache License

public DAG createDAG(FileSystem remoteFs, Configuration conf, Path remoteStagingDir, int numMapper,
        int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount,
        long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount,
        boolean writeSplitsToDFS, boolean generateSplitsInAM) throws IOException, YarnException {

    Configuration mapStageConf = new JobConf(conf);
    mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime);
    mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount);
    mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount);
    mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer);
    mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName());
    mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName());
    if (numIReducer == 0 && numReducer == 0) {
        mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());
    }//from www .jav  a  2  s  . c  o m

    MRHelpers.translateMRConfToTez(mapStageConf);

    Configuration[] intermediateReduceStageConfs = null;
    if (iReduceStagesCount > 0 && numIReducer > 0) {
        intermediateReduceStageConfs = new JobConf[iReduceStagesCount];
        for (int i = 1; i <= iReduceStagesCount; ++i) {
            JobConf iReduceStageConf = new JobConf(conf);
            iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime);
            iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount);
            iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer);
            iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName());
            iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
            iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
            iReduceStageConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());

            MRHelpers.translateMRConfToTez(iReduceStageConf);
            intermediateReduceStageConfs[i - 1] = iReduceStageConf;
        }
    }

    Configuration finalReduceConf = null;
    if (numReducer > 0) {
        finalReduceConf = new JobConf(conf);
        finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime);
        finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount);
        finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer);
        finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName());
        finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
        finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
        finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());

        MRHelpers.translateMRConfToTez(finalReduceConf);
    }

    MRHelpers.configureMRApiUsage(mapStageConf);
    if (iReduceStagesCount > 0 && numIReducer > 0) {
        for (int i = 0; i < iReduceStagesCount; ++i) {
            MRHelpers.configureMRApiUsage(intermediateReduceStageConfs[i]);
        }
    }
    if (numReducer > 0) {
        MRHelpers.configureMRApiUsage(finalReduceConf);
    }

    DataSourceDescriptor dataSource = null;
    if (!generateSplitsInAM && writeSplitsToDFS) {

        LOG.info("Writing splits to DFS");
        dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(mapStageConf, remoteStagingDir,
                true);
    } else {
        dataSource = MRInputLegacy.createConfigBuilder(mapStageConf, SleepInputFormat.class)
                .generateSplitsInAM(generateSplitsInAM).build();
    }

    DAG dag = DAG.create("MRRSleepJob");
    String jarPath = ClassUtil.findContainingJar(getClass());
    if (jarPath == null) {
        throw new TezUncheckedException(
                "Could not find any jar containing" + " MRRSleepJob.class in the classpath");
    }
    Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar"));
    remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);

    TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] { remoteJarPath }, mapStageConf);

    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(),
            jarFileStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    List<Vertex> vertices = new ArrayList<Vertex>();

    UserPayload mapUserPayload = TezUtils.createUserPayloadFromConf(mapStageConf);
    int numTasks = generateSplitsInAM ? -1 : numMapper;

    Map<String, String> mapEnv = Maps.newHashMap();
    MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, mapEnv, true);
    Map<String, String> reduceEnv = Maps.newHashMap();
    MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, reduceEnv, false);

    Vertex mapVertex = Vertex.create("map",
            ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks,
            MRHelpers.getResourceForMRMapper(mapStageConf));
    mapVertex.addTaskLocalFiles(commonLocalResources).addDataSource("MRInput", dataSource)
            .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRMapper(mapStageConf)).setTaskEnvironment(mapEnv);
    vertices.add(mapVertex);

    if (iReduceStagesCount > 0 && numIReducer > 0) {
        for (int i = 0; i < iReduceStagesCount; ++i) {
            Configuration iconf = intermediateReduceStageConfs[i];
            UserPayload iReduceUserPayload = TezUtils.createUserPayloadFromConf(iconf);
            Vertex ivertex = Vertex.create("ireduce" + (i + 1),
                    ProcessorDescriptor.create(ReduceProcessor.class.getName())
                            .setUserPayload(iReduceUserPayload),
                    numIReducer, MRHelpers.getResourceForMRReducer(intermediateReduceStageConfs[i]));
            ivertex.addTaskLocalFiles(commonLocalResources)
                    .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(intermediateReduceStageConfs[i]))
                    .setTaskEnvironment(reduceEnv);
            vertices.add(ivertex);
        }
    }

    Vertex finalReduceVertex = null;
    if (numReducer > 0) {
        UserPayload reducePayload = TezUtils.createUserPayloadFromConf(finalReduceConf);
        finalReduceVertex = Vertex.create("reduce",
                ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(reducePayload),
                numReducer, MRHelpers.getResourceForMRReducer(finalReduceConf));
        finalReduceVertex.addTaskLocalFiles(commonLocalResources)
                .addDataSink("MROutput",
                        MROutputLegacy.createConfigBuilder(finalReduceConf, NullOutputFormat.class).build())
                .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(finalReduceConf))
                .setTaskEnvironment(reduceEnv);
        vertices.add(finalReduceVertex);
    } else {
        // Map only job
        mapVertex.addDataSink("MROutput",
                MROutputLegacy.createConfigBuilder(mapStageConf, NullOutputFormat.class).build());
    }

    Map<String, String> partitionerConf = Maps.newHashMap();
    partitionerConf.put(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
    OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
            .newBuilder(IntWritable.class.getName(), IntWritable.class.getName(),
                    HashPartitioner.class.getName(), partitionerConf)
            .configureInput().useLegacyInput().done().build();

    for (int i = 0; i < vertices.size(); ++i) {
        dag.addVertex(vertices.get(i));
        if (i != 0) {
            dag.addEdge(
                    Edge.create(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty()));
        }
    }

    return dag;
}

Example usage for org.apache.hadoop.mapreduce MRJobConfig NUM_REDUCES

Introduction

Prototype

Usage