org.apache.tez.mapreduce.committer.MROutputCommitter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.mapreduce.committer.MROutputCommitter.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tez.mapreduce.committer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.mapred.FileOutputCommitter;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.TypeConverter;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.tez.common.TezUtils;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.dag.api.client.VertexStatus;
import org.apache.tez.mapreduce.hadoop.MRConfig;
import org.apache.tez.mapreduce.hadoop.MRJobConfig;
import org.apache.tez.runtime.api.OutputCommitter;
import org.apache.tez.runtime.api.OutputCommitterContext;

import java.io.IOException;

/**
 * Implements the {@link OutputCommitter} and provide Map Reduce compatible
 * output commit operations for Map Reduce compatible data sinks. 
 */
@Public
public class MROutputCommitter extends OutputCommitter {

    private static final Log LOG = LogFactory.getLog(MROutputCommitter.class);

    private org.apache.hadoop.mapreduce.OutputCommitter committer = null;
    private JobContext jobContext = null;
    private volatile boolean initialized = false;
    private JobConf jobConf = null;
    private boolean newApiCommitter;

    public MROutputCommitter(OutputCommitterContext committerContext) {
        super(committerContext);
    }

    @Override
    public void initialize() throws IOException {
        UserPayload userPayload = getContext().getOutputUserPayload();
        if (!userPayload.hasPayload()) {
            jobConf = new JobConf();
        } else {
            jobConf = new JobConf(TezUtils.createConfFromUserPayload(userPayload));
        }

        // Read all credentials into the credentials instance stored in JobConf.
        jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
        jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber());
        committer = getOutputCommitter(getContext());
        jobContext = getJobContextFromVertexContext(getContext());
        initialized = true;
    }

    @Override
    public void setupOutput() throws IOException {
        if (!initialized) {
            throw new RuntimeException("Committer not initialized");
        }
        committer.setupJob(jobContext);
    }

    @Override
    public void commitOutput() throws IOException {
        if (!initialized) {
            throw new RuntimeException("Committer not initialized");
        }
        committer.commitJob(jobContext);
    }

    @Override
    public void abortOutput(VertexStatus.State finalState) throws IOException {
        if (!initialized) {
            throw new RuntimeException("Committer not initialized");
        }
        JobStatus.State jobState = getJobStateFromVertexStatusState(finalState);
        committer.abortJob(jobContext, jobState);
    }

    @SuppressWarnings("rawtypes")
    private org.apache.hadoop.mapreduce.OutputCommitter getOutputCommitter(OutputCommitterContext context) {

        org.apache.hadoop.mapreduce.OutputCommitter committer = null;
        newApiCommitter = false;
        if (jobConf.getBoolean("mapred.reducer.new-api", false)
                || jobConf.getBoolean("mapred.mapper.new-api", false)) {
            newApiCommitter = true;
            LOG.info("Using mapred newApiCommitter.");
        }

        if (newApiCommitter) {
            TaskAttemptID taskAttemptID = new TaskAttemptID(
                    Long.toString(context.getApplicationId().getClusterTimestamp()),
                    context.getApplicationId().getId(),
                    ((jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false) ? TaskType.MAP : TaskType.REDUCE)), 0,
                    context.getDAGAttemptNumber());

            TaskAttemptContext taskContext = new TaskAttemptContextImpl(jobConf, taskAttemptID);
            try {
                OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                        jobConf);
                committer = outputFormat.getOutputCommitter(taskContext);
            } catch (Exception e) {
                throw new TezUncheckedException(e);
            }
        } else {
            committer = ReflectionUtils.newInstance(jobConf.getClass("mapred.output.committer.class",
                    FileOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class), jobConf);
        }
        LOG.info("OutputCommitter for outputName=" + context.getOutputName() + ", vertexName="
                + context.getVertexName() + ", outputCommitterClass=" + committer.getClass().getName());
        return committer;
    }

    // FIXME we are using ApplicationId as DAG id
    private JobContext getJobContextFromVertexContext(OutputCommitterContext context) throws IOException {
        JobID jobId = TypeConverter.fromYarn(context.getApplicationId());
        return new MRJobContextImpl(jobConf, jobId);
    }

    private JobStatus.State getJobStateFromVertexStatusState(VertexStatus.State state) {
        switch (state) {
        case INITED:
            return JobStatus.State.PREP;
        case RUNNING:
            return JobStatus.State.RUNNING;
        case SUCCEEDED:
            return JobStatus.State.SUCCEEDED;
        case KILLED:
            return JobStatus.State.KILLED;
        case FAILED:
        case ERROR:
            return JobStatus.State.FAILED;
        default:
            throw new TezUncheckedException("Unknown VertexStatus.State: " + state);
        }
    }

    private static class MRJobContextImpl extends org.apache.hadoop.mapred.JobContextImpl {

        public MRJobContextImpl(JobConf jobConf, JobID jobId) {
            super(jobConf, jobId);
        }

    }

    @Override
    public boolean isTaskRecoverySupported() {
        if (!initialized) {
            throw new RuntimeException("Committer not initialized");
        }
        return committer.isRecoverySupported();
    }

    @Override
    public void recoverTask(int taskIndex, int attemptId) throws IOException {
        if (!initialized) {
            throw new RuntimeException("Committer not initialized");
        }
        TaskAttemptID taskAttemptID = new TaskAttemptID(
                Long.toString(getContext().getApplicationId().getClusterTimestamp())
                        + String.valueOf(getContext().getVertexIndex()),
                getContext().getApplicationId().getId(),
                ((jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false) ? TaskType.MAP : TaskType.REDUCE)),
                taskIndex, attemptId);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(jobConf, taskAttemptID);
        committer.recoverTask(taskContext);
    }

}