org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java Source code

Introduction

Here is the source code for org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ignite.internal.processors.hadoop.impl.v2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.hadoop.io.serializer.Serialization;
import org.apache.hadoop.io.serializer.SerializationFactory;
import org.apache.hadoop.io.serializer.WritableSerialization;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobContextImpl;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskID;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobSubmissionFiles;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.hadoop.io.BytesWritablePartiallyRawComparator;
import org.apache.ignite.hadoop.io.PartiallyRawComparator;
import org.apache.ignite.hadoop.io.TextPartiallyRawComparator;
import org.apache.ignite.internal.processors.hadoop.HadoopCommonUtils;
import org.apache.ignite.internal.processors.hadoop.HadoopExternalSplit;
import org.apache.ignite.hadoop.HadoopInputSplit;
import org.apache.ignite.internal.processors.hadoop.HadoopJobEx;
import org.apache.ignite.internal.processors.hadoop.HadoopJobId;
import org.apache.ignite.internal.processors.hadoop.HadoopJobProperty;
import org.apache.ignite.internal.processors.hadoop.HadoopPartitioner;
import org.apache.ignite.internal.processors.hadoop.HadoopSerialization;
import org.apache.ignite.internal.processors.hadoop.HadoopSplitWrapper;
import org.apache.ignite.internal.processors.hadoop.HadoopTask;
import org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException;
import org.apache.ignite.internal.processors.hadoop.HadoopTaskContext;
import org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo;
import org.apache.ignite.internal.processors.hadoop.HadoopTaskType;
import org.apache.ignite.internal.processors.hadoop.counter.HadoopCounter;
import org.apache.ignite.internal.processors.hadoop.counter.HadoopCounters;
import org.apache.ignite.internal.processors.hadoop.counter.HadoopCountersImpl;
import org.apache.ignite.internal.processors.hadoop.impl.fs.HadoopLazyConcurrentMap;
import org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1CleanupTask;
import org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1MapTask;
import org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1Partitioner;
import org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1ReduceTask;
import org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1SetupTask;
import org.apache.ignite.internal.processors.hadoop.io.PartiallyOffheapRawComparatorEx;
import org.apache.ignite.internal.processors.igfs.IgfsUtils;
import org.apache.ignite.internal.util.typedef.F;
import org.apache.ignite.internal.util.typedef.internal.A;
import org.jetbrains.annotations.Nullable;

import java.io.DataInput;
import java.io.File;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.Callable;

import static org.apache.ignite.internal.processors.hadoop.impl.HadoopUtils.jobLocalDir;
import static org.apache.ignite.internal.processors.hadoop.impl.HadoopUtils.taskLocalDir;
import static org.apache.ignite.internal.processors.hadoop.impl.HadoopUtils.transformException;
import static org.apache.ignite.internal.processors.hadoop.impl.HadoopUtils.unwrapSplit;
import static org.apache.ignite.internal.processors.hadoop.impl.fs.HadoopFileSystemCacheUtils.FsCacheKey;
import static org.apache.ignite.internal.processors.hadoop.impl.fs.HadoopFileSystemCacheUtils.createHadoopLazyConcurrentMap;
import static org.apache.ignite.internal.processors.hadoop.impl.fs.HadoopFileSystemCacheUtils.fileSystemForMrUserWithCaching;
import static org.apache.ignite.internal.processors.hadoop.impl.fs.HadoopParameters.PARAM_IGFS_PREFER_LOCAL_WRITES;

/**
 * Context for task execution.
 */
public class HadoopV2TaskContext extends HadoopTaskContext {
    /** */
    private static final boolean COMBINE_KEY_GROUPING_SUPPORTED;

    /** Lazy per-user file system cache used by the Hadoop task. */
    private static final HadoopLazyConcurrentMap<FsCacheKey, FileSystem> fsMap = createHadoopLazyConcurrentMap();

    /** Default partial comparator mappings. */
    private static final Map<String, String> PARTIAL_COMPARATORS = new HashMap<>();

    /**
     * This method is called with reflection upon Job finish with class loader of each task.
     * This will clean up all the Fs created for specific task.
     * Each class loader sees uses its own instance of <code>fsMap<code/> since the class loaders
     * are different.
     *
     * @throws IgniteCheckedException On error.
     */
    public static void close() throws IgniteCheckedException {
        fsMap.close();
    }

    /** Flag is set if new context-object code is used for running the mapper. */
    private final boolean useNewMapper;

    /** Flag is set if new context-object code is used for running the reducer. */
    private final boolean useNewReducer;

    /** Flag is set if new context-object code is used for running the combiner. */
    private final boolean useNewCombiner;

    /** */
    private final JobContextImpl jobCtx;

    /** Set if task is to cancelling. */
    private volatile boolean cancelled;

    /** Current task. */
    private volatile HadoopTask task;

    /** Local node ID */
    private final UUID locNodeId;

    /** Counters for task. */
    private final HadoopCounters cntrs = new HadoopCountersImpl();

    static {
        boolean ok;

        try {
            JobContext.class.getDeclaredMethod("getCombinerKeyGroupingComparator");

            ok = true;
        } catch (NoSuchMethodException ignore) {
            ok = false;
        }

        COMBINE_KEY_GROUPING_SUPPORTED = ok;

        PARTIAL_COMPARATORS.put(ByteWritable.class.getName(), BytesWritablePartiallyRawComparator.class.getName());
        PARTIAL_COMPARATORS.put(Text.class.getName(), TextPartiallyRawComparator.class.getName());
    }

    /**
     * @param taskInfo Task info.
     * @param job Job.
     * @param jobId Job ID.
     * @param locNodeId Local node ID.
     * @param jobConfDataInput DataInput for read JobConf.
     */
    public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId,
            @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
        super(taskInfo, job);
        this.locNodeId = locNodeId;

        // Before create JobConf instance we should set new context class loader.
        ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader());

        try {
            JobConf jobConf = new JobConf();

            try {
                jobConf.readFields(jobConfDataInput);
            } catch (IOException e) {
                throw new IgniteCheckedException(e);
            }

            // For map-reduce jobs prefer local writes.
            jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

            initializePartiallyRawComparator(jobConf);

            jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

            useNewMapper = jobConf.getUseNewMapper();
            useNewReducer = jobConf.getUseNewReducer();
            useNewCombiner = jobConf.getCombinerClass() == null;
        } finally {
            HadoopCommonUtils.restoreContextClassLoader(oldLdr);
        }
    }

    /** {@inheritDoc} */
    @Override
    public <T extends HadoopCounter> T counter(String grp, String name, Class<T> cls) {
        return cntrs.counter(grp, name, cls);
    }

    /** {@inheritDoc} */
    @Override
    public HadoopCounters counters() {
        return cntrs;
    }

    /**
     * Creates appropriate task from current task info.
     *
     * @return Task.
     */
    private HadoopTask createTask() {
        boolean isAbort = taskInfo().type() == HadoopTaskType.ABORT;

        switch (taskInfo().type()) {
        case SETUP:
            return useNewMapper ? new HadoopV2SetupTask(taskInfo()) : new HadoopV1SetupTask(taskInfo());

        case MAP:
            return useNewMapper ? new HadoopV2MapTask(taskInfo()) : new HadoopV1MapTask(taskInfo());

        case REDUCE:
            return useNewReducer ? new HadoopV2ReduceTask(taskInfo(), true)
                    : new HadoopV1ReduceTask(taskInfo(), true);

        case COMBINE:
            return useNewCombiner ? new HadoopV2ReduceTask(taskInfo(), false)
                    : new HadoopV1ReduceTask(taskInfo(), false);

        case COMMIT:
        case ABORT:
            return useNewReducer ? new HadoopV2CleanupTask(taskInfo(), isAbort)
                    : new HadoopV1CleanupTask(taskInfo(), isAbort);

        default:
            return null;
        }
    }

    /** {@inheritDoc} */
    @Override
    public void run() throws IgniteCheckedException {
        ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf().getClassLoader());

        try {
            try {
                task = createTask();
            } catch (Throwable e) {
                if (e instanceof Error)
                    throw e;

                throw transformException(e);
            }

            if (cancelled)
                throw new HadoopTaskCancelledException("Task cancelled.");

            try {
                task.run(this);
            } catch (Throwable e) {
                if (e instanceof Error)
                    throw e;

                throw transformException(e);
            }
        } finally {
            task = null;

            HadoopCommonUtils.restoreContextClassLoader(oldLdr);
        }
    }

    /** {@inheritDoc} */
    @Override
    public void cancel() {
        cancelled = true;

        HadoopTask t = task;

        if (t != null)
            t.cancel();
    }

    /** {@inheritDoc} */
    @Override
    public void prepareTaskEnvironment() throws IgniteCheckedException {
        File locDir;

        switch (taskInfo().type()) {
        case MAP:
        case REDUCE:
            job().prepareTaskEnvironment(taskInfo());

            locDir = taskLocalDir(job.igniteWorkDirectory(), locNodeId, taskInfo());

            break;

        default:
            locDir = jobLocalDir(job.igniteWorkDirectory(), locNodeId, taskInfo().jobId());
        }

        ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf().getClassLoader());

        try {
            FileSystem.get(jobConf());

            LocalFileSystem locFs = FileSystem.getLocal(jobConf());

            locFs.setWorkingDirectory(new Path(locDir.getAbsolutePath()));
        } catch (Throwable e) {
            if (e instanceof Error)
                throw (Error) e;

            throw transformException(e);
        } finally {
            HadoopCommonUtils.restoreContextClassLoader(oldLdr);
        }
    }

    /** {@inheritDoc} */
    @Override
    public void cleanupTaskEnvironment() throws IgniteCheckedException {
        job().cleanupTaskEnvironment(taskInfo());
    }

    /**
     * Creates Hadoop attempt ID.
     *
     * @return Attempt ID.
     */
    public TaskAttemptID attemptId() {
        TaskID tid = new TaskID(jobCtx.getJobID(), taskType(taskInfo().type()), taskInfo().taskNumber());

        return new TaskAttemptID(tid, taskInfo().attempt());
    }

    /**
     * @param type Task type.
     * @return Hadoop task type.
     */
    private TaskType taskType(HadoopTaskType type) {
        switch (type) {
        case SETUP:
            return TaskType.JOB_SETUP;
        case MAP:
        case COMBINE:
            return TaskType.MAP;

        case REDUCE:
            return TaskType.REDUCE;

        case COMMIT:
        case ABORT:
            return TaskType.JOB_CLEANUP;

        default:
            return null;
        }
    }

    /**
     * Gets job configuration of the task.
     *
     * @return Job configuration.
     */
    public JobConf jobConf() {
        return jobCtx.getJobConf();
    }

    /**
     * Gets job context of the task.
     *
     * @return Job context.
     */
    public JobContextImpl jobContext() {
        return jobCtx;
    }

    /** {@inheritDoc} */
    @Override
    public HadoopPartitioner partitioner() throws IgniteCheckedException {
        Class<?> partClsOld = jobConf().getClass("mapred.partitioner.class", null);

        if (partClsOld != null)
            return new HadoopV1Partitioner(jobConf().getPartitionerClass(), jobConf());

        try {
            return new HadoopV2Partitioner(jobCtx.getPartitionerClass(), jobConf());
        } catch (ClassNotFoundException e) {
            throw new IgniteCheckedException(e);
        }
    }

    /**
     * Gets serializer for specified class.
     *
     * @param cls Class.
     * @param jobConf Job configuration.
     * @return Appropriate serializer.
     */
    @SuppressWarnings("unchecked")
    private HadoopSerialization getSerialization(Class<?> cls, Configuration jobConf)
            throws IgniteCheckedException {
        A.notNull(cls, "cls");

        SerializationFactory factory = new SerializationFactory(jobConf);

        Serialization<?> serialization = factory.getSerialization(cls);

        if (serialization == null)
            throw new IgniteCheckedException("Failed to find serialization for: " + cls.getName());

        if (serialization.getClass() == WritableSerialization.class)
            return new HadoopWritableSerialization((Class<? extends Writable>) cls);

        return new HadoopSerializationWrapper(serialization, cls);
    }

    /** {@inheritDoc} */
    @Override
    public HadoopSerialization keySerialization() throws IgniteCheckedException {
        return getSerialization(jobCtx.getMapOutputKeyClass(), jobConf());
    }

    /** {@inheritDoc} */
    @Override
    public HadoopSerialization valueSerialization() throws IgniteCheckedException {
        return getSerialization(jobCtx.getMapOutputValueClass(), jobConf());
    }

    /** {@inheritDoc} */
    @SuppressWarnings("unchecked")
    @Override
    public Comparator<Object> sortComparator() {
        return (Comparator<Object>) jobCtx.getSortComparator();
    }

    /** {@inheritDoc} */
    @SuppressWarnings("unchecked")
    @Override
    public PartiallyOffheapRawComparatorEx<Object> partialRawSortComparator() {
        Class cls = jobCtx.getJobConf().getClass(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(),
                null);

        if (cls == null)
            return null;

        Object res = ReflectionUtils.newInstance(cls, jobConf());

        if (res instanceof PartiallyOffheapRawComparatorEx)
            return (PartiallyOffheapRawComparatorEx) res;
        else
            return new HadoopV2DelegatingPartiallyOffheapRawComparator<>((PartiallyRawComparator) res);
    }

    /** {@inheritDoc} */
    @SuppressWarnings("unchecked")
    @Override
    public Comparator<Object> groupComparator() {
        Comparator<?> res;

        switch (taskInfo().type()) {
        case COMBINE:
            res = COMBINE_KEY_GROUPING_SUPPORTED ? jobContext().getCombinerKeyGroupingComparator()
                    : jobContext().getGroupingComparator();

            break;

        case REDUCE:
            res = jobContext().getGroupingComparator();

            break;

        default:
            return null;
        }

        if (res != null && res.getClass() != sortComparator().getClass())
            return (Comparator<Object>) res;

        return null;
    }

    /**
     * @param split Split.
     * @return Native Hadoop split.
     * @throws IgniteCheckedException if failed.
     */
    @SuppressWarnings("unchecked")
    public Object getNativeSplit(HadoopInputSplit split) throws IgniteCheckedException {
        if (split instanceof HadoopExternalSplit)
            return readExternalSplit((HadoopExternalSplit) split);

        if (split instanceof HadoopSplitWrapper)
            return unwrapSplit((HadoopSplitWrapper) split);

        throw new IllegalStateException("Unknown split: " + split);
    }

    /**
     * @param split External split.
     * @return Native input split.
     * @throws IgniteCheckedException If failed.
     */
    @SuppressWarnings("unchecked")
    private Object readExternalSplit(HadoopExternalSplit split) throws IgniteCheckedException {
        Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

        FileSystem fs;

        try {
            fs = fileSystemForMrUserWithCaching(jobDir.toUri(), jobConf(), fsMap);
        } catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        try (FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

            in.seek(split.offset());

            String clsName = Text.readString(in);

            Class<?> cls = jobConf().getClassByName(clsName);

            assert cls != null;

            Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

            Deserializer deserializer = serialization.getDeserializer(cls);

            deserializer.open(in);

            Object res = deserializer.deserialize(null);

            deserializer.close();

            assert res != null;

            return res;
        } catch (IOException | ClassNotFoundException e) {
            throw new IgniteCheckedException(e);
        }
    }

    /** {@inheritDoc} */
    @Override
    public <T> T runAsJobOwner(final Callable<T> c) throws IgniteCheckedException {
        String user = job.info().user();

        user = IgfsUtils.fixUserName(user);

        assert user != null;

        String ugiUser;

        try {
            UserGroupInformation currUser = UserGroupInformation.getCurrentUser();

            assert currUser != null;

            ugiUser = currUser.getShortUserName();
        } catch (IOException ioe) {
            throw new IgniteCheckedException(ioe);
        }

        try {
            if (F.eq(user, ugiUser))
                // if current UGI context user is the same, do direct call:
                return c.call();
            else {
                UserGroupInformation ugi = UserGroupInformation.getBestUGI(null, user);

                return ugi.doAs(new PrivilegedExceptionAction<T>() {
                    @Override
                    public T run() throws Exception {
                        return c.call();
                    }
                });
            }
        } catch (Exception e) {
            throw new IgniteCheckedException(e);
        }
    }

    /**
     * Try initializing partially raw comparator for job.
     *
     * @param conf Configuration.
     */
    private void initializePartiallyRawComparator(JobConf conf) {
        String clsName = conf.get(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), null);

        if (clsName == null) {
            Class keyCls = conf.getMapOutputKeyClass();

            while (keyCls != null) {
                clsName = PARTIAL_COMPARATORS.get(keyCls.getName());

                if (clsName != null) {
                    conf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), clsName);

                    break;
                }

                keyCls = keyCls.getSuperclass();
            }
        }
    }
}