org.apache.tez.mapreduce.output.TestMROutputLegacy.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.mapreduce.output.TestMROutputLegacy.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tez.mapreduce.output;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.tez.common.TezUtils;
import org.apache.tez.common.counters.TezCounters;
import org.apache.tez.dag.api.DataSinkDescriptor;
import org.apache.tez.dag.api.OutputCommitterDescriptor;
import org.apache.tez.dag.api.OutputDescriptor;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.mapreduce.committer.MROutputCommitter;
import org.apache.tez.mapreduce.hadoop.MRConfig;
import org.apache.tez.runtime.api.OutputContext;
import org.junit.Test;

public class TestMROutputLegacy {

    // simulate the behavior of translating MR to DAG using MR old API
    @Test(timeout = 5000)
    public void testOldAPI_MR() throws Exception {
        String outputPath = "/tmp/output";
        JobConf conf = new JobConf();
        conf.setOutputKeyClass(NullWritable.class);
        conf.setOutputValueClass(Text.class);
        conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
        org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
        // the output is attached to reducer
        conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
        UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
        OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
        DataSinkDescriptor sink = DataSinkDescriptor.create(od,
                OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

        OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
        MROutputLegacy output = new MROutputLegacy(outputContext, 2);
        output.initialize();
        assertEquals(false, output.useNewApi);
        assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
        assertNull(output.newOutputFormat);
        assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
        assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
        assertNull(output.newApiTaskAttemptContext);
        assertNotNull(output.oldRecordWriter);
        assertNull(output.newRecordWriter);
        assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
    }

    // simulate the behavior of translating MR to DAG using MR new API
    @Test(timeout = 5000)
    public void testNewAPI_MR() throws Exception {
        String outputPath = "/tmp/output";
        Job job = Job.getInstance();
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
        job.getConfiguration().setBoolean("mapred.reducer.new-api", true);
        // the output is attached to reducer
        job.getConfiguration().setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
        UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(job.getConfiguration());
        OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
        DataSinkDescriptor sink = DataSinkDescriptor.create(od,
                OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

        OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
        MROutputLegacy output = new MROutputLegacy(outputContext, 2);
        output.initialize();
        assertEquals(true, output.useNewApi);
        assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass());
        assertNull(output.oldOutputFormat);
        assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass());
        assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass());
        assertNull(output.oldApiTaskAttemptContext);
        assertNotNull(output.newRecordWriter);
        assertNull(output.oldRecordWriter);
        assertEquals(FileOutputCommitter.class, output.committer.getClass());
    }

    // simulate the behavior of translating Mapper-only job to DAG using MR old API
    @Test(timeout = 5000)
    public void testOldAPI_MapperOnly() throws Exception {
        String outputPath = "/tmp/output";
        JobConf conf = new JobConf();
        conf.setOutputKeyClass(NullWritable.class);
        conf.setOutputValueClass(Text.class);
        conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
        org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
        // the output is attached to mapper
        conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
        UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
        OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
        DataSinkDescriptor sink = DataSinkDescriptor.create(od,
                OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

        OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
        MROutputLegacy output = new MROutputLegacy(outputContext, 2);
        output.initialize();
        assertEquals(false, output.useNewApi);
        assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
        assertNull(output.newOutputFormat);
        assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
        assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
        assertNull(output.newApiTaskAttemptContext);
        assertNotNull(output.oldRecordWriter);
        assertNull(output.newRecordWriter);
        assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
    }

    //simulate the behavior of translating mapper-only job to DAG using MR new API
    @Test(timeout = 5000)
    public void testNewAPI_MapperOnly() throws Exception {
        String outputPath = "/tmp/output";
        Job job = Job.getInstance();
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
        job.getConfiguration().setBoolean("mapred.mapper.new-api", true);
        // the output is attached to mapper
        job.getConfiguration().setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
        UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(job.getConfiguration());
        OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
        DataSinkDescriptor sink = DataSinkDescriptor.create(od,
                OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

        OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
        MROutputLegacy output = new MROutputLegacy(outputContext, 2);
        output.initialize();
        assertEquals(true, output.useNewApi);
        assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass());
        assertNull(output.oldOutputFormat);
        assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass());
        assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass());
        assertNull(output.oldApiTaskAttemptContext);
        assertNotNull(output.newRecordWriter);
        assertNull(output.oldRecordWriter);
        assertEquals(FileOutputCommitter.class, output.committer.getClass());
    }

    private OutputContext createMockOutputContext(UserPayload payload) {
        OutputContext outputContext = mock(OutputContext.class);
        ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
        when(outputContext.getUserPayload()).thenReturn(payload);
        when(outputContext.getApplicationId()).thenReturn(appId);
        when(outputContext.getTaskVertexIndex()).thenReturn(1);
        when(outputContext.getTaskAttemptNumber()).thenReturn(1);
        when(outputContext.getCounters()).thenReturn(new TezCounters());
        return outputContext;
    }
}