co.cask.cdap.internal.app.runtime.batch.MapReduceProgramRunner.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.internal.app.runtime.batch.MapReduceProgramRunner.java

Source

/*
 * Copyright  2014-2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.internal.app.runtime.batch;

import co.cask.cdap.api.app.ApplicationSpecification;
import co.cask.cdap.api.mapreduce.MapReduce;
import co.cask.cdap.api.mapreduce.MapReduceSpecification;
import co.cask.cdap.api.metrics.MetricsCollectionService;
import co.cask.cdap.app.program.Program;
import co.cask.cdap.app.runtime.Arguments;
import co.cask.cdap.app.runtime.ProgramController;
import co.cask.cdap.app.runtime.ProgramOptions;
import co.cask.cdap.app.store.Store;
import co.cask.cdap.common.app.RunIds;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.lang.InstantiatorFactory;
import co.cask.cdap.common.lang.PropertyFieldSetter;
import co.cask.cdap.common.logging.LoggingContextAccessor;
import co.cask.cdap.common.logging.common.LogWriter;
import co.cask.cdap.common.logging.logback.CAppender;
import co.cask.cdap.data2.dataset2.DatasetFramework;
import co.cask.cdap.data2.metadata.writer.ProgramContextAware;
import co.cask.cdap.data2.registry.UsageRegistry;
import co.cask.cdap.data2.transaction.stream.StreamAdmin;
import co.cask.cdap.internal.app.runtime.AbstractProgramRunnerWithPlugin;
import co.cask.cdap.internal.app.runtime.DataSetFieldSetter;
import co.cask.cdap.internal.app.runtime.MetricsFieldSetter;
import co.cask.cdap.internal.app.runtime.ProgramOptionConstants;
import co.cask.cdap.internal.app.runtime.ProgramRunners;
import co.cask.cdap.internal.app.runtime.plugin.PluginInstantiator;
import co.cask.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework;
import co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo;
import co.cask.cdap.internal.lang.Reflections;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.ProgramRunStatus;
import co.cask.cdap.proto.ProgramType;
import co.cask.tephra.TransactionSystemClient;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.io.Closeables;
import com.google.common.reflect.TypeToken;
import com.google.common.util.concurrent.Service;
import com.google.inject.Inject;
import com.google.inject.Injector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.twill.api.RunId;
import org.apache.twill.common.Threads;
import org.apache.twill.discovery.DiscoveryServiceClient;
import org.apache.twill.filesystem.LocationFactory;
import org.apache.twill.internal.ServiceListenerAdapter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;

/**
 * Runs {@link MapReduce} programs.
 */
public class MapReduceProgramRunner extends AbstractProgramRunnerWithPlugin {
    private static final Logger LOG = LoggerFactory.getLogger(MapReduceProgramRunner.class);

    private final Injector injector;
    private final StreamAdmin streamAdmin;
    private final CConfiguration cConf;
    private final Configuration hConf;
    private final LocationFactory locationFactory;
    private final MetricsCollectionService metricsCollectionService;
    private final DatasetFramework datasetFramework;
    private final Store store;
    private final TransactionSystemClient txSystemClient;
    private final DiscoveryServiceClient discoveryServiceClient;
    private final UsageRegistry usageRegistry;

    @Inject
    public MapReduceProgramRunner(Injector injector, CConfiguration cConf, Configuration hConf,
            LocationFactory locationFactory, StreamAdmin streamAdmin, DatasetFramework datasetFramework,
            TransactionSystemClient txSystemClient, MetricsCollectionService metricsCollectionService,
            DiscoveryServiceClient discoveryServiceClient, Store store, UsageRegistry usageRegistry) {
        super(cConf);
        this.injector = injector;
        this.cConf = cConf;
        this.hConf = hConf;
        this.locationFactory = locationFactory;
        this.streamAdmin = streamAdmin;
        this.metricsCollectionService = metricsCollectionService;
        this.datasetFramework = datasetFramework;
        this.txSystemClient = txSystemClient;
        this.discoveryServiceClient = discoveryServiceClient;
        this.store = store;
        this.usageRegistry = usageRegistry;
    }

    @Inject(optional = true)
    void setLogWriter(@Nullable LogWriter logWriter) {
        if (logWriter != null) {
            CAppender.logWriter = logWriter;
        }
    }

    @Override
    public ProgramController run(final Program program, ProgramOptions options) {
        // Extract and verify parameters
        ApplicationSpecification appSpec = program.getApplicationSpecification();
        Preconditions.checkNotNull(appSpec, "Missing application specification.");

        ProgramType processorType = program.getType();
        Preconditions.checkNotNull(processorType, "Missing processor type.");
        Preconditions.checkArgument(processorType == ProgramType.MAPREDUCE,
                "Only MAPREDUCE process type is supported.");

        MapReduceSpecification spec = appSpec.getMapReduce().get(program.getName());
        Preconditions.checkNotNull(spec, "Missing MapReduceSpecification for %s", program.getName());

        // Optionally get runId. If the map-reduce started by other program (e.g. Workflow), it inherit the runId.
        Arguments arguments = options.getArguments();

        final RunId runId = RunIds.fromString(arguments.getOption(ProgramOptionConstants.RUN_ID));

        WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
        DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework
                : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);

        // Setup dataset framework context, if required
        if (programDatasetFramework instanceof ProgramContextAware) {
            Id.Program programId = program.getId();
            ((ProgramContextAware) programDatasetFramework).initContext(new Id.Run(programId, runId.getId()));
        }

        MapReduce mapReduce;
        try {
            mapReduce = new InstantiatorFactory(false).get(TypeToken.of(program.<MapReduce>getMainClass()))
                    .create();
        } catch (Exception e) {
            LOG.error("Failed to instantiate MapReduce class for {}", spec.getClassName(), e);
            throw Throwables.propagate(e);
        }

        // List of all Closeable resources that needs to be cleanup
        List<Closeable> closeables = new ArrayList<>();
        try {
            PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
            if (pluginInstantiator != null) {
                closeables.add(pluginInstantiator);
            }

            final BasicMapReduceContext context = new BasicMapReduceContext(program, runId,
                    options.getUserArguments(), spec, workflowInfo, discoveryServiceClient,
                    metricsCollectionService, txSystemClient, programDatasetFramework, streamAdmin,
                    getPluginArchive(options), pluginInstantiator);

            Reflections.visit(mapReduce, mapReduce.getClass(),
                    new PropertyFieldSetter(context.getSpecification().getProperties()),
                    new MetricsFieldSetter(context.getMetrics()), new DataSetFieldSetter(context));

            // note: this sets logging context on the thread level
            LoggingContextAccessor.setLoggingContext(context.getLoggingContext());

            final Service mapReduceRuntimeService = new MapReduceRuntimeService(injector, cConf, hConf, mapReduce,
                    spec, context, program.getJarLocation(), locationFactory, streamAdmin, txSystemClient,
                    usageRegistry);
            mapReduceRuntimeService.addListener(
                    createRuntimeServiceListener(program, runId, closeables, arguments, options.getUserArguments()),
                    Threads.SAME_THREAD_EXECUTOR);

            final ProgramController controller = new MapReduceProgramController(mapReduceRuntimeService, context);

            LOG.info("Starting MapReduce Job: {}", context.toString());
            // if security is not enabled, start the job as the user we're using to access hdfs with.
            // if this is not done, the mapred job will be launched as the user that runs the program
            // runner, which is probably the yarn user. This may cause permissions issues if the program
            // tries to access cdap data. For example, writing to a FileSet will fail, as the yarn user will
            // be running the job, but the data directory will be owned by cdap.
            if (MapReduceTaskContextProvider.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
                mapReduceRuntimeService.start();
            } else {
                ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), mapReduceRuntimeService);
            }
            return controller;
        } catch (Exception e) {
            closeAllQuietly(closeables);
            throw Throwables.propagate(e);
        }
    }

    /**
     * Creates a service listener to reactor on state changes on {@link MapReduceRuntimeService}.
     */
    private Service.Listener createRuntimeServiceListener(final Program program, final RunId runId,
            final Iterable<Closeable> closeables, final Arguments arguments, final Arguments userArgs) {

        final String twillRunId = arguments.getOption(ProgramOptionConstants.TWILL_RUN_ID);

        return new ServiceListenerAdapter() {
            @Override
            public void starting() {
                //Get start time from RunId
                long startTimeInSeconds = RunIds.getTime(runId, TimeUnit.SECONDS);
                if (startTimeInSeconds == -1) {
                    // If RunId is not time-based, use current time as start time
                    startTimeInSeconds = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
                }
                store.setStart(program.getId(), runId.getId(), startTimeInSeconds, twillRunId, userArgs.asMap(),
                        arguments.asMap());
            }

            @Override
            public void terminated(Service.State from) {
                closeAllQuietly(closeables);
                ProgramRunStatus runStatus = ProgramController.State.COMPLETED.getRunStatus();
                if (from == Service.State.STOPPING) {
                    // Service was killed
                    runStatus = ProgramController.State.KILLED.getRunStatus();
                }

                store.setStop(program.getId(), runId.getId(),
                        TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()), runStatus);
            }

            @Override
            public void failed(Service.State from, @Nullable Throwable failure) {
                closeAllQuietly(closeables);
                store.setStop(program.getId(), runId.getId(),
                        TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()),
                        ProgramController.State.ERROR.getRunStatus(), failure);
            }
        };
    }

    private void closeAllQuietly(Iterable<Closeable> closeables) {
        for (Closeable c : closeables) {
            Closeables.closeQuietly(c);
        }
    }

    @Nullable
    private File getPluginArchive(ProgramOptions options) {
        if (!options.getArguments().hasOption(ProgramOptionConstants.PLUGIN_ARCHIVE)) {
            return null;
        }
        return new File(options.getArguments().getOption(ProgramOptionConstants.PLUGIN_ARCHIVE));
    }
}