Example usage for org.apache.hadoop.mapred JobConf setBoolean

List of usage examples for org.apache.hadoop.mapred JobConf setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setBoolean.

Prototype

public void setBoolean(String name, boolean value) 

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:org.apache.sysml.runtime.transform.ApplyTfCSVMR.java

License:Apache License

public static JobReturn runJob(String inputPath, String spec, String mapsPath, String tmpPath,
        String outputPath, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols,
        int replication, String headerLine) throws IOException, ClassNotFoundException, InterruptedException {
    JobConf job = new JobConf(ApplyTfCSVMR.class);
    job.setJobName("ApplyTfCSV");

    /* Setup MapReduce Job */
    job.setJarByClass(ApplyTfCSVMR.class);

    // set relevant classes
    job.setMapperClass(ApplyTfCSVMapper.class);
    job.setNumReduceTasks(0);/*ww  w  .jav  a2  s  .c  o m*/

    // Add transformation metadata file as well as partOffsetsFile to Distributed cache
    DistributedCache.addCacheFile((new Path(mapsPath)).toUri(), job);
    DistributedCache.createSymlink(job);

    Path cachefile = new Path(partOffsetsFile);
    DistributedCache.addCacheFile(cachefile.toUri(), job);
    DistributedCache.createSymlink(job);

    // set input and output properties
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    // delete outputPath, if exists already.
    Path outPath = new Path(outputPath);
    FileSystem fs = FileSystem.get(job);
    fs.delete(outPath, true);
    FileOutputFormat.setOutputPath(job, outPath);

    job.set(MRJobConfiguration.TF_HAS_HEADER, Boolean.toString(inputDataProperties.hasHeader()));
    job.set(MRJobConfiguration.TF_DELIM, inputDataProperties.getDelim());
    if (inputDataProperties.getNAStrings() != null)
        // Adding "dummy" string to handle the case of na_strings = ""
        job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()));
    job.set(MRJobConfiguration.TF_SPEC, spec);
    job.set(MRJobConfiguration.TF_SMALLEST_FILE, CSVReblockMR.findSmallestFile(job, inputPath));
    job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, outputPath);
    job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols);
    job.set(MRJobConfiguration.TF_TXMTD_PATH, mapsPath);
    job.set(MRJobConfiguration.TF_HEADER, headerLine);
    job.set(CSVReblockMR.ROWID_FILE_NAME, cachefile.toString());
    job.set(MRJobConfiguration.TF_TMP_LOC, tmpPath);

    //turn off adaptivemr
    job.setBoolean("adaptivemr.map.enable", false);

    // Run the job
    RunningJob runjob = JobClient.runJob(job);

    // Since transform CSV produces part files w/ prefix transform-part-*,
    // delete all the "default" part-..... files
    deletePartFiles(fs, outPath);

    MatrixCharacteristics mc = new MatrixCharacteristics();
    return new JobReturn(new MatrixCharacteristics[] { mc }, runjob.isSuccessful());
}

From source file:org.apache.sysml.runtime.transform.GenTfMtdMR.java

License:Apache License

public static long runJob(String inputPath, String txMtdPath, String specWithIDs, String smallestFile,
        String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols, int replication,
        String headerLine) throws IOException, ClassNotFoundException, InterruptedException {
    JobConf job = new JobConf(GenTfMtdMR.class);
    job.setJobName("GenTfMTD");

    /* Setup MapReduce Job */
    job.setJarByClass(GenTfMtdMR.class);

    // set relevant classes
    job.setMapperClass(GTFMTDMapper.class);
    job.setReducerClass(GTFMTDReducer.class);

    // set input and output properties
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DistinctValue.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    // delete outputPath, if exists already.
    Path outPath = new Path(txMtdPath);
    FileSystem fs = FileSystem.get(job);
    fs.delete(outPath, true);/*from  ww  w  . j  a  v  a2 s.com*/
    FileOutputFormat.setOutputPath(job, outPath);

    job.set(MRJobConfiguration.TF_HAS_HEADER, Boolean.toString(inputDataProperties.hasHeader()));
    job.set(MRJobConfiguration.TF_DELIM, inputDataProperties.getDelim());
    if (inputDataProperties.getNAStrings() != null)
        // Adding "dummy" string to handle the case of na_strings = ""
        job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()));
    job.set(MRJobConfiguration.TF_SPEC, specWithIDs);
    job.set(MRJobConfiguration.TF_SMALLEST_FILE, smallestFile);
    job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols);
    job.set(MRJobConfiguration.TF_HEADER, headerLine);

    job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, txMtdPath);

    // offsets file to store part-file names and offsets for each input split
    job.set(MRJobConfiguration.TF_OFFSETS_FILE, partOffsetsFile);

    //turn off adaptivemr
    job.setBoolean("adaptivemr.map.enable", false);

    // Run the job
    RunningJob runjob = JobClient.runJob(job);

    Counters c = runjob.getCounters();
    long tx_numRows = c.findCounter(MRJobConfiguration.DataTransformCounters.TRANSFORMED_NUM_ROWS).getCounter();

    return tx_numRows;
}

From source file:org.apache.tez.mapreduce.hadoop.TestConfigTranslationMRToTez.java

License:Apache License

@Test(timeout = 5000)
public void testMRToTezKeyTranslation() {
    JobConf confVertex1 = new JobConf();
    confVertex1.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
    confVertex1.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, LongWritable.class.getName());
    confVertex1.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);

    MRHelpers.translateMRConfToTez(confVertex1);

    // Verify translation
    assertEquals(IntWritable.class.getName(), ConfigUtils.getIntermediateOutputKeyClass(confVertex1).getName());
    assertEquals(LongWritable.class.getName(),
            ConfigUtils.getIntermediateOutputValueClass(confVertex1).getName());
    assertEquals(IntWritable.class.getName(), ConfigUtils.getIntermediateInputKeyClass(confVertex1).getName());
    assertEquals(LongWritable.class.getName(),
            ConfigUtils.getIntermediateInputValueClass(confVertex1).getName());
    assertTrue(ConfigUtils.shouldCompressIntermediateOutput(confVertex1));
    assertTrue(ConfigUtils.isIntermediateInputCompressed(confVertex1));
}

From source file:org.apache.tez.mapreduce.hadoop.TestDeprecatedKeys.java

License:Apache License

@Test(timeout = 5000)
public void verifyReduceKeyTranslation() {
    JobConf jobConf = new JobConf();

    jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f);
    jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l);
    jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
    jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f);
    jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f);
    jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f);
    jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true);
    jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f);

    MRHelpers.translateMRConfToTez(jobConf);

    assertEquals(0.4f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f),
            0.01f);/*from w w  w.j  av a 2s .  co m*/
    assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
    assertEquals(2000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
    assertEquals(0.55f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0),
            0.01f);
    assertEquals(0.60f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0),
            0.01f);
    assertEquals(0.22f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0), 0.01f);
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
    assertEquals(0.33f,
            jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0), 0.01f);
}

From source file:org.apache.tez.mapreduce.hadoop.TestDeprecatedKeys.java

License:Apache License

@Test(timeout = 5000)
/**//from   www.  ja  va 2s.com
 * Set of keys that can be overriden at tez runtime
 */
public void verifyTezOverridenKeys() {
    JobConf jobConf = new JobConf();
    jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
    jobConf.setInt(MRJobConfig.IO_SORT_MB, 100);
    jobConf.setInt(MRJobConfig.COUNTERS_MAX_KEY, 100);

    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 1000);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 200);
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, true);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, 20);
    jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SORT_SPILL_PERCENT, 0.2f);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, 10);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 20);
    jobConf.setInt(Constants.TEZ_RUNTIME_TASK_MEMORY, 10);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 10);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 10);
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR, true);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 10);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 10);
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, true);
    jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 10.0f);
    jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 10.0f);
    jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 10.0f);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 10);
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true);
    jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 10.0f);
    jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, "DefaultSorter");
    jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_GROUP_COMPARATOR_CLASS, "groupComparator");
    jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS, "SecondaryComparator");

    jobConf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false);
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, true);

    MRHelpers.translateMRConfToTez(jobConf);

    assertEquals(1000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
    assertEquals(200, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 100));
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, false));
    assertEquals(20, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, 0));
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, 0));
    assertEquals(20, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 0));
    assertEquals(10, jobConf.getInt(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 0));
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 0));
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR, false));
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 0));
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 0));
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, false));
    assertEquals(10.0f,
            jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.0f), 0.0f);
    assertEquals(10.0f,
            jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0.0f), 0.0f);
    assertEquals(10.0f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0.0f),
            0.0f);
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0));
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
    assertEquals(10.0f,
            jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.0f), 0.0f);
    assertEquals("DefaultSorter", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, ""));
    assertEquals("groupComparator",
            jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_GROUP_COMPARATOR_CLASS, ""));
    assertEquals("SecondaryComparator",
            jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS, ""));
    assertEquals("DefaultSorter", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, ""));
    assertTrue(jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false));

    assertNull(jobConf.get(MRConfig.MAPRED_IFILE_READAHEAD));
    assertNull(jobConf.get(MRConfig.MAPRED_IFILE_READAHEAD_BYTES));
    assertNull(jobConf.get(MRJobConfig.RECORDS_BEFORE_PROGRESS));
    assertNull(jobConf.get(MRJobConfig.IO_SORT_FACTOR));
    assertNull(jobConf.get(MRJobConfig.IO_SORT_MB));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_READ_TIMEOUT));
    assertNull(jobConf.get(MRJobConfig.INDEX_CACHE_MEMORY_LIMIT));
    assertNull(jobConf.get(MRJobConfig.MAP_COMBINE_MIN_SPILLS));
    assertNull(jobConf.get(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_PARALLEL_COPIES));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_FETCH_FAILURES));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_NOTIFY_READERROR));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_CONNECT_TIMEOUT));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_READ_TIMEOUT));
    assertNull(jobConf.get(MRConfig.SHUFFLE_SSL_ENABLED_KEY));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT));
    assertNull(jobConf.get(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD));
    assertNull(jobConf.get(MRJobConfig.REDUCE_MEMTOMEM_ENABLED));
    assertNull(jobConf.get(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT));
    assertNull(jobConf.get(MRJobConfig.GROUP_COMPARATOR_CLASS));
    assertNull(jobConf.get(MRJobConfig.GROUP_COMPARATOR_CLASS));
    assertNull(jobConf.get("map.sort.class"));
}

From source file:org.apache.tez.mapreduce.output.TestMROutputLegacy.java

License:Apache License

@Test(timeout = 5000)
public void testOldAPI_MR() throws Exception {
    String outputPath = "/tmp/output";
    JobConf conf = new JobConf();
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
    org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
    // the output is attached to reducer
    conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
    OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
    DataSinkDescriptor sink = DataSinkDescriptor.create(od,
            OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

    OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
    MROutputLegacy output = new MROutputLegacy(outputContext, 2);
    output.initialize();//from   ww w. ja v  a 2 s.  c  o m
    assertEquals(false, output.useNewApi);
    assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
    assertNull(output.newOutputFormat);
    assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
    assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
    assertNull(output.newApiTaskAttemptContext);
    assertNotNull(output.oldRecordWriter);
    assertNull(output.newRecordWriter);
    assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}

From source file:org.apache.tez.mapreduce.output.TestMROutputLegacy.java

License:Apache License

@Test(timeout = 5000)
public void testOldAPI_MapperOnly() throws Exception {
    String outputPath = "/tmp/output";
    JobConf conf = new JobConf();
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
    org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
    // the output is attached to mapper
    conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
    UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
    OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
    DataSinkDescriptor sink = DataSinkDescriptor.create(od,
            OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

    OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
    MROutputLegacy output = new MROutputLegacy(outputContext, 2);
    output.initialize();//from w  w  w.  ja va2 s .c o  m
    assertEquals(false, output.useNewApi);
    assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
    assertNull(output.newOutputFormat);
    assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
    assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
    assertNull(output.newApiTaskAttemptContext);
    assertNotNull(output.oldRecordWriter);
    assertNull(output.newRecordWriter);
    assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}

From source file:org.apache.tez.mapreduce.output.TestMultiMROutput.java

License:Apache License

private MultiMROutput createMROutputs(Class outputFormat, boolean isMapper, boolean useLazyOutputFormat)
        throws InterruptedException, IOException {
    String outputPath = "/tmp/output";
    JobConf conf = new JobConf();
    conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, isMapper);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    DataSinkDescriptor dataSink = MultiMROutput
            .createConfigBuilder(conf, outputFormat, outputPath, useLazyOutputFormat).build();

    OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
    MultiMROutput output = new MultiMROutput(outputContext, 2);
    output.initialize();//from  w ww .j  a va 2s .c  om
    return output;
}

From source file:org.apache.tez.mapreduce.processor.map.MapProcessor.java

License:Apache License

@Override
public void localizeConfiguration(JobConf jobConf) throws IOException, InterruptedException {
    super.localizeConfiguration(jobConf);
    jobConf.setBoolean(JobContext.TASK_ISMAP, true);
}

From source file:org.apache.tez.mapreduce.processor.map.TestMapProcessor.java

License:Apache License

@Test(timeout = 5000)
public void testMapProcessor() throws Exception {
    String dagName = "mrdag0";
    String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);// w  w  w .j a va2  s.c om

    MRHelpers.translateMRConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);

    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);

    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR,
            new Path(workDir, "localized-resources").toUri().toString());

    Path mapInput = new Path(workDir, "map0");

    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput);

    InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
            InputDescriptor.create(MRInputLegacy.class.getName())
                    .setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto
                            .newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf))
                            .build().toByteArray()))),
            1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex",
            OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName())
                    .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)),
            1);

    LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0,
            new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName,
            Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec));

    task.initialize();
    task.run();
    task.close();

    OutputContext outputContext = task.getOutputContexts().iterator().next();
    TezTaskOutput mapOutputs = new TezTaskOutputFiles(jobConf, outputContext.getUniqueIdentifier());

    // TODO NEWTEZ FIXME OutputCommitter verification
    //    MRTask mrTask = (MRTask)t.getProcessor();
    //    Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask
    //        .getCommitter().getClass().getName());
    //    t.close();

    Path mapOutputFile = getMapOutputFile(jobConf, outputContext);
    LOG.info("mapOutputFile = " + mapOutputFile);
    IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1);
    LongWritable key = new LongWritable();
    Text value = new Text();
    DataInputBuffer keyBuf = new DataInputBuffer();
    DataInputBuffer valueBuf = new DataInputBuffer();
    long prev = Long.MIN_VALUE;
    while (reader.nextRawKey(keyBuf)) {
        reader.nextRawValue(valueBuf);
        key.readFields(keyBuf);
        value.readFields(valueBuf);
        if (prev != Long.MIN_VALUE) {
            assert (prev <= key.get());
            prev = key.get();
        }
        LOG.info("key = " + key.get() + "; value = " + value);
    }
    reader.close();
}