List of usage examples for org.apache.hadoop.mapreduce MRJobConfig NUM_REDUCES
String NUM_REDUCES
To view the source code for org.apache.hadoop.mapreduce MRJobConfig NUM_REDUCES.
Click Source Link
From source file:eastcircle.terasort.TotalOrderPartitioner.java
License:Apache License
/** * Read the cut points from the given sequence file. * @param fs the file system/*from ww w. ja va 2 s . c o m*/ * @param p the path to read * @param job the job config * @return the strings to split the partitions on * @throws IOException */ private static Text[] readPartitions(FileSystem fs, Path p, Configuration conf) throws IOException { int reduces = conf.getInt(MRJobConfig.NUM_REDUCES, 1); Text[] result = new Text[reduces - 1]; DataInputStream reader = fs.open(p); for (int i = 0; i < reduces - 1; ++i) { result[i] = new Text(); result[i].readFields(reader); } reader.close(); return result; }
From source file:eastcircle.terasort.TotalOrderPartitioner.java
License:Apache License
private void writeObject(ObjectOutputStream out) throws IOException { out.writeUTF(conf.get("fs.defaultFS")); out.writeInt(conf.getInt(MRJobConfig.NUM_REDUCES, 2)); /**//from w w w.j a v a2 s .c o m * Instead of serializing the trie, * we serialize the filename containing sampling points * so that we can rebuild the trie in each task. */ out.writeUTF(this.partFile.toString()); }
From source file:eastcircle.terasort.TotalOrderPartitioner.java
License:Apache License
private void readObject(ObjectInputStream in) throws IOException { this.conf = new Configuration(); conf.set("fs.defaultFS", (String) in.readUTF()); conf.setInt(MRJobConfig.NUM_REDUCES, (int) in.readInt()); this.partFile = new Path((String) in.readUTF()); this.trie = buildTrieFromHDFS(conf, partFile); }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOTest.java
License:Apache License
private static Configuration loadTestConfiguration(Class<?> outputFormatClassName, Class<?> keyClass, Class<?> valueClass) { Configuration conf = new Configuration(); conf.setClass(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClassName, OutputFormat.class); conf.setClass(MRJobConfig.OUTPUT_KEY_CLASS, keyClass, Object.class); conf.setClass(MRJobConfig.OUTPUT_VALUE_CLASS, valueClass, Object.class); conf.setInt(MRJobConfig.NUM_REDUCES, REDUCERS_COUNT); conf.set(MRJobConfig.ID, String.valueOf(1)); return conf;//w w w.j ava 2s . co m }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormats.java
License:Apache License
/** * Returns count of the reducers specified under key {@link MRJobConfig#NUM_REDUCES} in hadoop * {@link Configuration}.//from w ww. j ava 2 s. c o m * * @param conf hadoop {@link Configuration} * @return configured count of reducers */ static int getReducersCount(Configuration conf) { return conf.getInt(MRJobConfig.NUM_REDUCES, DEFAULT_NUM_REDUCERS); }
From source file:org.apache.druid.indexer.IndexGeneratorJobTest.java
License:Apache License
@Before public void setUp() throws Exception { mapper = HadoopDruidIndexerConfig.JSON_MAPPER; mapper.registerSubtypes(new NamedType(HashBasedNumberedShardSpec.class, "hashed")); mapper.registerSubtypes(new NamedType(SingleDimensionShardSpec.class, "single")); dataFile = temporaryFolder.newFile(); tmpDir = temporaryFolder.newFolder(); HashMap<String, Object> inputSpec = new HashMap<String, Object>(); inputSpec.put("paths", dataFile.getCanonicalPath()); inputSpec.put("type", "static"); if (inputFormatName != null) { inputSpec.put("inputFormat", inputFormatName); }/*w ww . j a v a2s . c o m*/ if (SequenceFileInputFormat.class.getName().equals(inputFormatName)) { writeDataToLocalSequenceFile(dataFile, data); } else { FileUtils.writeLines(dataFile, data); } config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec( new DataSchema(datasourceName, mapper.convertValue(inputRowParser, Map.class), aggs, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(this.interval)), null, mapper), new HadoopIOConfig(ImmutableMap.copyOf(inputSpec), null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, maxRowsInMemory, maxBytesInMemory, true, false, false, false, ImmutableMap.of(MRJobConfig.NUM_REDUCES, "0"), //verifies that set num reducers is ignored false, useCombiner, null, true, null, forceExtendableShardSpecs, false, null, null, null, null))); config.setShardSpecs(loadShardSpecs(partitionType, shardInfoForEachSegment)); config = HadoopDruidIndexerConfig.fromSpec(config.getSchema()); }
From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java
License:Apache License
public DAG createDAG(FileSystem remoteFs, Configuration conf, Path remoteStagingDir, int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount, boolean writeSplitsToDFS, boolean generateSplitsInAM) throws IOException, YarnException { Configuration mapStageConf = new JobConf(conf); mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper); mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime); mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime); mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount); mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount); mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount); mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer); mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName()); mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName()); if (numIReducer == 0 && numReducer == 0) { mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName()); }//from www .jav a 2 s . c o m MRHelpers.translateMRConfToTez(mapStageConf); Configuration[] intermediateReduceStageConfs = null; if (iReduceStagesCount > 0 && numIReducer > 0) { intermediateReduceStageConfs = new JobConf[iReduceStagesCount]; for (int i = 1; i <= iReduceStagesCount; ++i) { JobConf iReduceStageConf = new JobConf(conf); iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime); iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount); iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer); iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName()); iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); iReduceStageConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); MRHelpers.translateMRConfToTez(iReduceStageConf); intermediateReduceStageConfs[i - 1] = iReduceStageConf; } } Configuration finalReduceConf = null; if (numReducer > 0) { finalReduceConf = new JobConf(conf); finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime); finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount); finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer); finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName()); finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName()); MRHelpers.translateMRConfToTez(finalReduceConf); } MRHelpers.configureMRApiUsage(mapStageConf); if (iReduceStagesCount > 0 && numIReducer > 0) { for (int i = 0; i < iReduceStagesCount; ++i) { MRHelpers.configureMRApiUsage(intermediateReduceStageConfs[i]); } } if (numReducer > 0) { MRHelpers.configureMRApiUsage(finalReduceConf); } DataSourceDescriptor dataSource = null; if (!generateSplitsInAM && writeSplitsToDFS) { LOG.info("Writing splits to DFS"); dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(mapStageConf, remoteStagingDir, true); } else { dataSource = MRInputLegacy.createConfigBuilder(mapStageConf, SleepInputFormat.class) .generateSplitsInAM(generateSplitsInAM).build(); } DAG dag = DAG.create("MRRSleepJob"); String jarPath = ClassUtil.findContainingJar(getClass()); if (jarPath == null) { throw new TezUncheckedException( "Could not find any jar containing" + " MRRSleepJob.class in the classpath"); } Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar")); remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath); TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] { remoteJarPath }, mapStageConf); Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>(); LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(), jarFileStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); List<Vertex> vertices = new ArrayList<Vertex>(); UserPayload mapUserPayload = TezUtils.createUserPayloadFromConf(mapStageConf); int numTasks = generateSplitsInAM ? -1 : numMapper; Map<String, String> mapEnv = Maps.newHashMap(); MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, mapEnv, true); Map<String, String> reduceEnv = Maps.newHashMap(); MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, reduceEnv, false); Vertex mapVertex = Vertex.create("map", ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks, MRHelpers.getResourceForMRMapper(mapStageConf)); mapVertex.addTaskLocalFiles(commonLocalResources).addDataSource("MRInput", dataSource) .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRMapper(mapStageConf)).setTaskEnvironment(mapEnv); vertices.add(mapVertex); if (iReduceStagesCount > 0 && numIReducer > 0) { for (int i = 0; i < iReduceStagesCount; ++i) { Configuration iconf = intermediateReduceStageConfs[i]; UserPayload iReduceUserPayload = TezUtils.createUserPayloadFromConf(iconf); Vertex ivertex = Vertex.create("ireduce" + (i + 1), ProcessorDescriptor.create(ReduceProcessor.class.getName()) .setUserPayload(iReduceUserPayload), numIReducer, MRHelpers.getResourceForMRReducer(intermediateReduceStageConfs[i])); ivertex.addTaskLocalFiles(commonLocalResources) .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(intermediateReduceStageConfs[i])) .setTaskEnvironment(reduceEnv); vertices.add(ivertex); } } Vertex finalReduceVertex = null; if (numReducer > 0) { UserPayload reducePayload = TezUtils.createUserPayloadFromConf(finalReduceConf); finalReduceVertex = Vertex.create("reduce", ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(reducePayload), numReducer, MRHelpers.getResourceForMRReducer(finalReduceConf)); finalReduceVertex.addTaskLocalFiles(commonLocalResources) .addDataSink("MROutput", MROutputLegacy.createConfigBuilder(finalReduceConf, NullOutputFormat.class).build()) .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(finalReduceConf)) .setTaskEnvironment(reduceEnv); vertices.add(finalReduceVertex); } else { // Map only job mapVertex.addDataSink("MROutput", MROutputLegacy.createConfigBuilder(mapStageConf, NullOutputFormat.class).build()); } Map<String, String> partitionerConf = Maps.newHashMap(); partitionerConf.put(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig .newBuilder(IntWritable.class.getName(), IntWritable.class.getName(), HashPartitioner.class.getName(), partitionerConf) .configureInput().useLegacyInput().done().build(); for (int i = 0; i < vertices.size(); ++i) { dag.addVertex(vertices.get(i)); if (i != 0) { dag.addEdge( Edge.create(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty())); } } return dag; }