org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java Source code

Introduction

Here is the source code for org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ignite.internal.processors.hadoop.v2;

import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.*;
import org.apache.hadoop.io.serializer.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskID;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.*;
import org.apache.ignite.*;
import org.apache.ignite.internal.processors.hadoop.*;
import org.apache.ignite.internal.processors.hadoop.counter.*;
import org.apache.ignite.internal.processors.hadoop.fs.*;
import org.apache.ignite.internal.processors.hadoop.v1.*;
import org.apache.ignite.internal.util.typedef.internal.*;
import org.jetbrains.annotations.*;

import java.io.*;
import java.util.*;

import static org.apache.ignite.igfs.hadoop.IgfsHadoopParameters.*;
import static org.apache.ignite.internal.processors.hadoop.GridHadoopUtils.*;

/**
 * Context for task execution.
 */
public class GridHadoopV2TaskContext extends GridHadoopTaskContext {
    /** */
    private static final boolean COMBINE_KEY_GROUPING_SUPPORTED;

    /**
     * Check for combiner grouping support (available since Hadoop 2.3).
     */
    static {
        boolean ok;

        try {
            JobContext.class.getDeclaredMethod("getCombinerKeyGroupingComparator");

            ok = true;
        } catch (NoSuchMethodException ignore) {
            ok = false;
        }

        COMBINE_KEY_GROUPING_SUPPORTED = ok;
    }

    /** Flag is set if new context-object code is used for running the mapper. */
    private final boolean useNewMapper;

    /** Flag is set if new context-object code is used for running the reducer. */
    private final boolean useNewReducer;

    /** Flag is set if new context-object code is used for running the combiner. */
    private final boolean useNewCombiner;

    /** */
    private final JobContextImpl jobCtx;

    /** Set if task is to cancelling. */
    private volatile boolean cancelled;

    /** Current task. */
    private volatile GridHadoopTask task;

    /** Local node ID */
    private UUID locNodeId;

    /** Counters for task. */
    private final GridHadoopCounters cntrs = new GridHadoopCountersImpl();

    /**
     * @param taskInfo Task info.
     * @param job Job.
     * @param jobId Job ID.
     * @param locNodeId Local node ID.
     * @param jobConfDataInput DataInput for read JobConf.
     */
    public GridHadoopV2TaskContext(GridHadoopTaskInfo taskInfo, GridHadoopJob job, GridHadoopJobId jobId,
            @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
        super(taskInfo, job);
        this.locNodeId = locNodeId;

        // Before create JobConf instance we should set new context class loader.
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

        try {
            JobConf jobConf = new JobConf();

            try {
                jobConf.readFields(jobConfDataInput);
            } catch (IOException e) {
                throw new IgniteCheckedException(e);
            }

            // For map-reduce jobs prefer local writes.
            jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

            jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

            useNewMapper = jobConf.getUseNewMapper();
            useNewReducer = jobConf.getUseNewReducer();
            useNewCombiner = jobConf.getCombinerClass() == null;
        } finally {
            Thread.currentThread().setContextClassLoader(null);
        }
    }

    /** {@inheritDoc} */
    @Override
    public <T extends GridHadoopCounter> T counter(String grp, String name, Class<T> cls) {
        return cntrs.counter(grp, name, cls);
    }

    /** {@inheritDoc} */
    @Override
    public GridHadoopCounters counters() {
        return cntrs;
    }

    /**
     * Creates appropriate task from current task info.
     *
     * @return Task.
     */
    private GridHadoopTask createTask() {
        boolean isAbort = taskInfo().type() == GridHadoopTaskType.ABORT;

        switch (taskInfo().type()) {
        case SETUP:
            return useNewMapper ? new GridHadoopV2SetupTask(taskInfo()) : new GridHadoopV1SetupTask(taskInfo());

        case MAP:
            return useNewMapper ? new GridHadoopV2MapTask(taskInfo()) : new GridHadoopV1MapTask(taskInfo());

        case REDUCE:
            return useNewReducer ? new GridHadoopV2ReduceTask(taskInfo(), true)
                    : new GridHadoopV1ReduceTask(taskInfo(), true);

        case COMBINE:
            return useNewCombiner ? new GridHadoopV2ReduceTask(taskInfo(), false)
                    : new GridHadoopV1ReduceTask(taskInfo(), false);

        case COMMIT:
        case ABORT:
            return useNewReducer ? new GridHadoopV2CleanupTask(taskInfo(), isAbort)
                    : new GridHadoopV1CleanupTask(taskInfo(), isAbort);

        default:
            return null;
        }
    }

    /** {@inheritDoc} */
    @Override
    public void run() throws IgniteCheckedException {
        try {
            Thread.currentThread().setContextClassLoader(jobConf().getClassLoader());

            try {
                task = createTask();
            } catch (Throwable e) {
                throw transformException(e);
            }

            if (cancelled)
                throw new GridHadoopTaskCancelledException("Task cancelled.");

            try {
                task.run(this);
            } catch (Throwable e) {
                throw transformException(e);
            }
        } finally {
            task = null;

            Thread.currentThread().setContextClassLoader(null);
        }
    }

    /** {@inheritDoc} */
    @Override
    public void cancel() {
        cancelled = true;

        GridHadoopTask t = task;

        if (t != null)
            t.cancel();
    }

    /** {@inheritDoc} */
    @Override
    public void prepareTaskEnvironment() throws IgniteCheckedException {
        File locDir;

        switch (taskInfo().type()) {
        case MAP:
        case REDUCE:
            job().prepareTaskEnvironment(taskInfo());

            locDir = taskLocalDir(locNodeId, taskInfo());

            break;

        default:
            locDir = jobLocalDir(locNodeId, taskInfo().jobId());
        }

        Thread.currentThread().setContextClassLoader(jobConf().getClassLoader());

        try {
            FileSystem fs = FileSystem.get(jobConf());

            GridHadoopFileSystemsUtils.setUser(fs, jobConf().getUser());

            LocalFileSystem locFs = FileSystem.getLocal(jobConf());

            locFs.setWorkingDirectory(new Path(locDir.getAbsolutePath()));
        } catch (Throwable e) {
            throw transformException(e);
        } finally {
            Thread.currentThread().setContextClassLoader(null);
        }
    }

    /** {@inheritDoc} */
    @Override
    public void cleanupTaskEnvironment() throws IgniteCheckedException {
        job().cleanupTaskEnvironment(taskInfo());
    }

    /**
     * Creates Hadoop attempt ID.
     *
     * @return Attempt ID.
     */
    public TaskAttemptID attemptId() {
        TaskID tid = new TaskID(jobCtx.getJobID(), taskType(taskInfo().type()), taskInfo().taskNumber());

        return new TaskAttemptID(tid, taskInfo().attempt());
    }

    /**
     * @param type Task type.
     * @return Hadoop task type.
     */
    private TaskType taskType(GridHadoopTaskType type) {
        switch (type) {
        case SETUP:
            return TaskType.JOB_SETUP;
        case MAP:
        case COMBINE:
            return TaskType.MAP;

        case REDUCE:
            return TaskType.REDUCE;

        case COMMIT:
        case ABORT:
            return TaskType.JOB_CLEANUP;

        default:
            return null;
        }
    }

    /**
     * Gets job configuration of the task.
     *
     * @return Job configuration.
     */
    public JobConf jobConf() {
        return jobCtx.getJobConf();
    }

    /**
     * Gets job context of the task.
     *
     * @return Job context.
     */
    public JobContextImpl jobContext() {
        return jobCtx;
    }

    /** {@inheritDoc} */
    @Override
    public GridHadoopPartitioner partitioner() throws IgniteCheckedException {
        Class<?> partClsOld = jobConf().getClass("mapred.partitioner.class", null);

        if (partClsOld != null)
            return new GridHadoopV1Partitioner(jobConf().getPartitionerClass(), jobConf());

        try {
            return new GridHadoopV2Partitioner(jobCtx.getPartitionerClass(), jobConf());
        } catch (ClassNotFoundException e) {
            throw new IgniteCheckedException(e);
        }
    }

    /**
     * Gets serializer for specified class.
     *
     * @param cls Class.
     * @param jobConf Job configuration.
     * @return Appropriate serializer.
     */
    @SuppressWarnings("unchecked")
    private GridHadoopSerialization getSerialization(Class<?> cls, Configuration jobConf)
            throws IgniteCheckedException {
        A.notNull(cls, "cls");

        SerializationFactory factory = new SerializationFactory(jobConf);

        Serialization<?> serialization = factory.getSerialization(cls);

        if (serialization == null)
            throw new IgniteCheckedException("Failed to find serialization for: " + cls.getName());

        if (serialization.getClass() == WritableSerialization.class)
            return new GridHadoopWritableSerialization((Class<? extends Writable>) cls);

        return new GridHadoopSerializationWrapper(serialization, cls);
    }

    /** {@inheritDoc} */
    @Override
    public GridHadoopSerialization keySerialization() throws IgniteCheckedException {
        return getSerialization(jobCtx.getMapOutputKeyClass(), jobConf());
    }

    /** {@inheritDoc} */
    @Override
    public GridHadoopSerialization valueSerialization() throws IgniteCheckedException {
        return getSerialization(jobCtx.getMapOutputValueClass(), jobConf());
    }

    /** {@inheritDoc} */
    @Override
    public Comparator<Object> sortComparator() {
        return (Comparator<Object>) jobCtx.getSortComparator();
    }

    /** {@inheritDoc} */
    @Override
    public Comparator<Object> groupComparator() {
        Comparator<?> res;

        switch (taskInfo().type()) {
        case COMBINE:
            res = COMBINE_KEY_GROUPING_SUPPORTED ? jobContext().getCombinerKeyGroupingComparator()
                    : jobContext().getGroupingComparator();

            break;

        case REDUCE:
            res = jobContext().getGroupingComparator();

            break;

        default:
            return null;
        }

        if (res != null && res.getClass() != sortComparator().getClass())
            return (Comparator<Object>) res;

        return null;
    }

    /**
     * @param split Split.
     * @return Native Hadoop split.
     * @throws IgniteCheckedException if failed.
     */
    @SuppressWarnings("unchecked")
    public Object getNativeSplit(GridHadoopInputSplit split) throws IgniteCheckedException {
        if (split instanceof GridHadoopExternalSplit)
            return readExternalSplit((GridHadoopExternalSplit) split);

        if (split instanceof GridHadoopSplitWrapper)
            return unwrapSplit((GridHadoopSplitWrapper) split);

        throw new IllegalStateException("Unknown split: " + split);
    }

    /**
     * @param split External split.
     * @return Native input split.
     * @throws IgniteCheckedException If failed.
     */
    @SuppressWarnings("unchecked")
    private Object readExternalSplit(GridHadoopExternalSplit split) throws IgniteCheckedException {
        Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf());
                FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

            in.seek(split.offset());

            String clsName = Text.readString(in);

            Class<?> cls = jobConf().getClassByName(clsName);

            assert cls != null;

            Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

            Deserializer deserializer = serialization.getDeserializer(cls);

            deserializer.open(in);

            Object res = deserializer.deserialize(null);

            deserializer.close();

            assert res != null;

            return res;
        } catch (IOException | ClassNotFoundException e) {
            throw new IgniteCheckedException(e);
        }
    }
}