List of usage examples for org.apache.hadoop.mapred JobConf setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:org.apache.sysml.runtime.transform.ApplyTfCSVMR.java
License:Apache License
public static JobReturn runJob(String inputPath, String spec, String mapsPath, String tmpPath, String outputPath, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols, int replication, String headerLine) throws IOException, ClassNotFoundException, InterruptedException { JobConf job = new JobConf(ApplyTfCSVMR.class); job.setJobName("ApplyTfCSV"); /* Setup MapReduce Job */ job.setJarByClass(ApplyTfCSVMR.class); // set relevant classes job.setMapperClass(ApplyTfCSVMapper.class); job.setNumReduceTasks(0);/*ww w .jav a2 s .c o m*/ // Add transformation metadata file as well as partOffsetsFile to Distributed cache DistributedCache.addCacheFile((new Path(mapsPath)).toUri(), job); DistributedCache.createSymlink(job); Path cachefile = new Path(partOffsetsFile); DistributedCache.addCacheFile(cachefile.toUri(), job); DistributedCache.createSymlink(job); // set input and output properties job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setInt(MRConfigurationNames.DFS_REPLICATION, replication); FileInputFormat.addInputPath(job, new Path(inputPath)); // delete outputPath, if exists already. Path outPath = new Path(outputPath); FileSystem fs = FileSystem.get(job); fs.delete(outPath, true); FileOutputFormat.setOutputPath(job, outPath); job.set(MRJobConfiguration.TF_HAS_HEADER, Boolean.toString(inputDataProperties.hasHeader())); job.set(MRJobConfiguration.TF_DELIM, inputDataProperties.getDelim()); if (inputDataProperties.getNAStrings() != null) // Adding "dummy" string to handle the case of na_strings = "" job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings())); job.set(MRJobConfiguration.TF_SPEC, spec); job.set(MRJobConfiguration.TF_SMALLEST_FILE, CSVReblockMR.findSmallestFile(job, inputPath)); job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, outputPath); job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols); job.set(MRJobConfiguration.TF_TXMTD_PATH, mapsPath); job.set(MRJobConfiguration.TF_HEADER, headerLine); job.set(CSVReblockMR.ROWID_FILE_NAME, cachefile.toString()); job.set(MRJobConfiguration.TF_TMP_LOC, tmpPath); //turn off adaptivemr job.setBoolean("adaptivemr.map.enable", false); // Run the job RunningJob runjob = JobClient.runJob(job); // Since transform CSV produces part files w/ prefix transform-part-*, // delete all the "default" part-..... files deletePartFiles(fs, outPath); MatrixCharacteristics mc = new MatrixCharacteristics(); return new JobReturn(new MatrixCharacteristics[] { mc }, runjob.isSuccessful()); }
From source file:org.apache.sysml.runtime.transform.GenTfMtdMR.java
License:Apache License
public static long runJob(String inputPath, String txMtdPath, String specWithIDs, String smallestFile, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols, int replication, String headerLine) throws IOException, ClassNotFoundException, InterruptedException { JobConf job = new JobConf(GenTfMtdMR.class); job.setJobName("GenTfMTD"); /* Setup MapReduce Job */ job.setJarByClass(GenTfMtdMR.class); // set relevant classes job.setMapperClass(GTFMTDMapper.class); job.setReducerClass(GTFMTDReducer.class); // set input and output properties job.setInputFormat(TextInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DistinctValue.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInt(MRConfigurationNames.DFS_REPLICATION, replication); FileInputFormat.addInputPath(job, new Path(inputPath)); // delete outputPath, if exists already. Path outPath = new Path(txMtdPath); FileSystem fs = FileSystem.get(job); fs.delete(outPath, true);/*from ww w . j a v a2 s.com*/ FileOutputFormat.setOutputPath(job, outPath); job.set(MRJobConfiguration.TF_HAS_HEADER, Boolean.toString(inputDataProperties.hasHeader())); job.set(MRJobConfiguration.TF_DELIM, inputDataProperties.getDelim()); if (inputDataProperties.getNAStrings() != null) // Adding "dummy" string to handle the case of na_strings = "" job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings())); job.set(MRJobConfiguration.TF_SPEC, specWithIDs); job.set(MRJobConfiguration.TF_SMALLEST_FILE, smallestFile); job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols); job.set(MRJobConfiguration.TF_HEADER, headerLine); job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, txMtdPath); // offsets file to store part-file names and offsets for each input split job.set(MRJobConfiguration.TF_OFFSETS_FILE, partOffsetsFile); //turn off adaptivemr job.setBoolean("adaptivemr.map.enable", false); // Run the job RunningJob runjob = JobClient.runJob(job); Counters c = runjob.getCounters(); long tx_numRows = c.findCounter(MRJobConfiguration.DataTransformCounters.TRANSFORMED_NUM_ROWS).getCounter(); return tx_numRows; }
From source file:org.apache.tez.mapreduce.hadoop.TestConfigTranslationMRToTez.java
License:Apache License
@Test(timeout = 5000) public void testMRToTezKeyTranslation() { JobConf confVertex1 = new JobConf(); confVertex1.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); confVertex1.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, LongWritable.class.getName()); confVertex1.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); MRHelpers.translateMRConfToTez(confVertex1); // Verify translation assertEquals(IntWritable.class.getName(), ConfigUtils.getIntermediateOutputKeyClass(confVertex1).getName()); assertEquals(LongWritable.class.getName(), ConfigUtils.getIntermediateOutputValueClass(confVertex1).getName()); assertEquals(IntWritable.class.getName(), ConfigUtils.getIntermediateInputKeyClass(confVertex1).getName()); assertEquals(LongWritable.class.getName(), ConfigUtils.getIntermediateInputValueClass(confVertex1).getName()); assertTrue(ConfigUtils.shouldCompressIntermediateOutput(confVertex1)); assertTrue(ConfigUtils.isIntermediateInputCompressed(confVertex1)); }
From source file:org.apache.tez.mapreduce.hadoop.TestDeprecatedKeys.java
License:Apache License
@Test(timeout = 5000) public void verifyReduceKeyTranslation() { JobConf jobConf = new JobConf(); jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f); jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l); jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000); jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f); jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f); jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f); jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true); jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f); MRHelpers.translateMRConfToTez(jobConf); assertEquals(0.4f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f), 0.01f);/*from w w w.j av a 2s . co m*/ assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0)); assertEquals(2000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0)); assertEquals(0.55f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f); assertEquals(0.60f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0), 0.01f); assertEquals(0.22f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0), 0.01f); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false)); assertEquals(0.33f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0), 0.01f); }
From source file:org.apache.tez.mapreduce.hadoop.TestDeprecatedKeys.java
License:Apache License
@Test(timeout = 5000) /**//from www. ja va 2s.com * Set of keys that can be overriden at tez runtime */ public void verifyTezOverridenKeys() { JobConf jobConf = new JobConf(); jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000); jobConf.setInt(MRJobConfig.IO_SORT_MB, 100); jobConf.setInt(MRJobConfig.COUNTERS_MAX_KEY, 100); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 1000); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 200); jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, true); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, 20); jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SORT_SPILL_PERCENT, 0.2f); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, 10); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 20); jobConf.setInt(Constants.TEZ_RUNTIME_TASK_MEMORY, 10); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 10); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 10); jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR, true); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 10); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 10); jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, true); jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 10.0f); jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 10.0f); jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 10.0f); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 10); jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true); jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 10.0f); jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, "DefaultSorter"); jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_GROUP_COMPARATOR_CLASS, "groupComparator"); jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS, "SecondaryComparator"); jobConf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false); jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, true); MRHelpers.translateMRConfToTez(jobConf); assertEquals(1000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0)); assertEquals(200, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 100)); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, false)); assertEquals(20, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, 0)); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, 0)); assertEquals(20, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 0)); assertEquals(10, jobConf.getInt(Constants.TEZ_RUNTIME_TASK_MEMORY, 0)); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 0)); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 0)); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR, false)); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 0)); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 0)); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, false)); assertEquals(10.0f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.0f), 0.0f); assertEquals(10.0f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0.0f), 0.0f); assertEquals(10.0f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0.0f), 0.0f); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0)); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false)); assertEquals(10.0f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.0f), 0.0f); assertEquals("DefaultSorter", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, "")); assertEquals("groupComparator", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_GROUP_COMPARATOR_CLASS, "")); assertEquals("SecondaryComparator", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS, "")); assertEquals("DefaultSorter", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, "")); assertTrue(jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false)); assertNull(jobConf.get(MRConfig.MAPRED_IFILE_READAHEAD)); assertNull(jobConf.get(MRConfig.MAPRED_IFILE_READAHEAD_BYTES)); assertNull(jobConf.get(MRJobConfig.RECORDS_BEFORE_PROGRESS)); assertNull(jobConf.get(MRJobConfig.IO_SORT_FACTOR)); assertNull(jobConf.get(MRJobConfig.IO_SORT_MB)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_READ_TIMEOUT)); assertNull(jobConf.get(MRJobConfig.INDEX_CACHE_MEMORY_LIMIT)); assertNull(jobConf.get(MRJobConfig.MAP_COMBINE_MIN_SPILLS)); assertNull(jobConf.get(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_PARALLEL_COPIES)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_FETCH_FAILURES)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_NOTIFY_READERROR)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_CONNECT_TIMEOUT)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_READ_TIMEOUT)); assertNull(jobConf.get(MRConfig.SHUFFLE_SSL_ENABLED_KEY)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT)); assertNull(jobConf.get(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD)); assertNull(jobConf.get(MRJobConfig.REDUCE_MEMTOMEM_ENABLED)); assertNull(jobConf.get(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT)); assertNull(jobConf.get(MRJobConfig.GROUP_COMPARATOR_CLASS)); assertNull(jobConf.get(MRJobConfig.GROUP_COMPARATOR_CLASS)); assertNull(jobConf.get("map.sort.class")); }
From source file:org.apache.tez.mapreduce.output.TestMROutputLegacy.java
License:Apache License
@Test(timeout = 5000) public void testOldAPI_MR() throws Exception { String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); // the output is attached to reducer conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false); UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf); OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload); DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null); OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload()); MROutputLegacy output = new MROutputLegacy(outputContext, 2); output.initialize();//from ww w. ja v a 2 s. c o m assertEquals(false, output.useNewApi); assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass()); assertNull(output.newOutputFormat); assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass()); assertNull(output.newApiTaskAttemptContext); assertNotNull(output.oldRecordWriter); assertNull(output.newRecordWriter); assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass()); }
From source file:org.apache.tez.mapreduce.output.TestMROutputLegacy.java
License:Apache License
@Test(timeout = 5000) public void testOldAPI_MapperOnly() throws Exception { String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); // the output is attached to mapper conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true); UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf); OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload); DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null); OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload()); MROutputLegacy output = new MROutputLegacy(outputContext, 2); output.initialize();//from w w w. ja va2 s .c o m assertEquals(false, output.useNewApi); assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass()); assertNull(output.newOutputFormat); assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass()); assertNull(output.newApiTaskAttemptContext); assertNotNull(output.oldRecordWriter); assertNull(output.newRecordWriter); assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass()); }
From source file:org.apache.tez.mapreduce.output.TestMultiMROutput.java
License:Apache License
private MultiMROutput createMROutputs(Class outputFormat, boolean isMapper, boolean useLazyOutputFormat) throws InterruptedException, IOException { String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, isMapper); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); DataSinkDescriptor dataSink = MultiMROutput .createConfigBuilder(conf, outputFormat, outputPath, useLazyOutputFormat).build(); OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); MultiMROutput output = new MultiMROutput(outputContext, 2); output.initialize();//from w ww .j a va 2s .c om return output; }
From source file:org.apache.tez.mapreduce.processor.map.MapProcessor.java
License:Apache License
@Override public void localizeConfiguration(JobConf jobConf) throws IOException, InterruptedException { super.localizeConfiguration(jobConf); jobConf.setBoolean(JobContext.TASK_ISMAP, true); }
From source file:org.apache.tez.mapreduce.processor.map.TestMapProcessor.java
License:Apache License
@Test(timeout = 5000) public void testMapProcessor() throws Exception { String dagName = "mrdag0"; String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName(); JobConf jobConf = new JobConf(defaultConf); setUpJobConf(jobConf);// w w w .j a va2 s.c om MRHelpers.translateMRConfToTez(jobConf); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false); jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString()); Path mapInput = new Path(workDir, "map0"); MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput); InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()) .setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto .newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)) .build().toByteArray()))), 1); OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1); LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec)); task.initialize(); task.run(); task.close(); OutputContext outputContext = task.getOutputContexts().iterator().next(); TezTaskOutput mapOutputs = new TezTaskOutputFiles(jobConf, outputContext.getUniqueIdentifier()); // TODO NEWTEZ FIXME OutputCommitter verification // MRTask mrTask = (MRTask)t.getProcessor(); // Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask // .getCommitter().getClass().getName()); // t.close(); Path mapOutputFile = getMapOutputFile(jobConf, outputContext); LOG.info("mapOutputFile = " + mapOutputFile); IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1); LongWritable key = new LongWritable(); Text value = new Text(); DataInputBuffer keyBuf = new DataInputBuffer(); DataInputBuffer valueBuf = new DataInputBuffer(); long prev = Long.MIN_VALUE; while (reader.nextRawKey(keyBuf)) { reader.nextRawValue(valueBuf); key.readFields(keyBuf); value.readFields(valueBuf); if (prev != Long.MIN_VALUE) { assert (prev <= key.get()); prev = key.get(); } LOG.info("key = " + key.get() + "; value = " + value); } reader.close(); }