co.cask.cdap.internal.app.runtime.spark.SparkProgramRunner.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.internal.app.runtime.spark.SparkProgramRunner.java

Source

/*
 * Copyright  2014-2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.internal.app.runtime.spark;

import co.cask.cdap.api.metrics.MetricsCollectionService;
import co.cask.cdap.api.spark.Spark;
import co.cask.cdap.api.spark.SparkSpecification;
import co.cask.cdap.api.workflow.WorkflowToken;
import co.cask.cdap.app.ApplicationSpecification;
import co.cask.cdap.app.program.Program;
import co.cask.cdap.app.runtime.Arguments;
import co.cask.cdap.app.runtime.ProgramController;
import co.cask.cdap.app.runtime.ProgramOptions;
import co.cask.cdap.app.runtime.ProgramRunner;
import co.cask.cdap.app.store.Store;
import co.cask.cdap.common.app.RunIds;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.lang.InstantiatorFactory;
import co.cask.cdap.common.lang.PropertyFieldSetter;
import co.cask.cdap.data2.dataset2.DatasetFramework;
import co.cask.cdap.data2.transaction.stream.StreamAdmin;
import co.cask.cdap.internal.app.runtime.DataSetFieldSetter;
import co.cask.cdap.internal.app.runtime.MetricsFieldSetter;
import co.cask.cdap.internal.app.runtime.ProgramOptionConstants;
import co.cask.cdap.internal.app.runtime.workflow.BasicWorkflowToken;
import co.cask.cdap.internal.lang.Reflections;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.ProgramType;
import co.cask.tephra.TransactionContext;
import co.cask.tephra.TransactionSystemClient;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.reflect.TypeToken;
import com.google.common.util.concurrent.Service;
import com.google.gson.Gson;
import com.google.inject.Inject;
import org.apache.hadoop.conf.Configuration;
import org.apache.twill.api.RunId;
import org.apache.twill.common.Threads;
import org.apache.twill.discovery.DiscoveryServiceClient;
import org.apache.twill.internal.ServiceListenerAdapter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.TimeUnit;

/**
 * Runs {@link Spark} programs
 */
public class SparkProgramRunner implements ProgramRunner {

    private static final Logger LOG = LoggerFactory.getLogger(SparkProgramRunner.class);
    private static final Gson GSON = new Gson();

    private final DatasetFramework datasetFramework;
    private final Configuration hConf;
    private final CConfiguration cConf;
    private final MetricsCollectionService metricsCollectionService;
    private final TransactionSystemClient txSystemClient;
    private final DiscoveryServiceClient discoveryServiceClient;
    private final StreamAdmin streamAdmin;
    private final Store store;

    @Inject
    public SparkProgramRunner(CConfiguration cConf, Configuration hConf, TransactionSystemClient txSystemClient,
            DatasetFramework datasetFramework, MetricsCollectionService metricsCollectionService,
            DiscoveryServiceClient discoveryServiceClient, StreamAdmin streamAdmin, Store store) {
        this.hConf = hConf;
        this.datasetFramework = datasetFramework;
        this.cConf = cConf;
        this.metricsCollectionService = metricsCollectionService;
        this.txSystemClient = txSystemClient;
        this.discoveryServiceClient = discoveryServiceClient;
        this.streamAdmin = streamAdmin;
        this.store = store;
    }

    @Override
    public ProgramController run(Program program, ProgramOptions options) {
        // Extract and verify parameters
        final ApplicationSpecification appSpec = program.getApplicationSpecification();
        Preconditions.checkNotNull(appSpec, "Missing application specification.");

        ProgramType processorType = program.getType();
        Preconditions.checkNotNull(processorType, "Missing processor type.");
        Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only Spark process type is supported.");

        final SparkSpecification spec = appSpec.getSpark().get(program.getName());
        Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());

        // Optionally get runId. If the spark started by other program (e.g. Workflow), it inherit the runId.
        Arguments arguments = options.getArguments();
        RunId runId = RunIds.fromString(arguments.getOption(ProgramOptionConstants.RUN_ID));

        long logicalStartTime = arguments.hasOption(ProgramOptionConstants.LOGICAL_START_TIME)
                ? Long.parseLong(arguments.getOption(ProgramOptionConstants.LOGICAL_START_TIME))
                : System.currentTimeMillis();

        WorkflowToken workflowToken = null;
        if (arguments.hasOption(ProgramOptionConstants.WORKFLOW_TOKEN)) {
            workflowToken = GSON.fromJson(arguments.getOption(ProgramOptionConstants.WORKFLOW_TOKEN),
                    BasicWorkflowToken.class);
        }

        ClientSparkContext context = new ClientSparkContext(program, runId, logicalStartTime,
                options.getUserArguments().asMap(), new TransactionContext(txSystemClient), datasetFramework,
                discoveryServiceClient, metricsCollectionService, workflowToken);

        Spark spark;
        try {
            spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();

            // Fields injection
            Reflections.visit(spark, TypeToken.of(spark.getClass()), new PropertyFieldSetter(spec.getProperties()),
                    new DataSetFieldSetter(context), new MetricsFieldSetter(context.getMetrics()));
        } catch (Exception e) {
            LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
            throw Throwables.propagate(e);
        }

        Service sparkRuntimeService = new SparkRuntimeService(cConf, hConf, spark,
                new SparkContextFactory(hConf, context, datasetFramework, streamAdmin), program.getJarLocation(),
                txSystemClient);

        sparkRuntimeService.addListener(createRuntimeServiceListener(program.getId(), runId, arguments),
                Threads.SAME_THREAD_EXECUTOR);
        ProgramController controller = new SparkProgramController(sparkRuntimeService, context);

        LOG.info("Starting Spark Job: {}", context.toString());
        sparkRuntimeService.start();
        return controller;
    }

    /**
     * Creates a service listener to reactor on state changes on {@link SparkRuntimeService}.
     */
    private Service.Listener createRuntimeServiceListener(final Id.Program programId, final RunId runId,
            Arguments arguments) {

        final String twillRunId = arguments.getOption(ProgramOptionConstants.TWILL_RUN_ID);
        final String workflowName = arguments.getOption(ProgramOptionConstants.WORKFLOW_NAME);
        final String workflowNodeId = arguments.getOption(ProgramOptionConstants.WORKFLOW_NODE_ID);
        final String workflowRunId = arguments.getOption(ProgramOptionConstants.WORKFLOW_RUN_ID);

        return new ServiceListenerAdapter() {
            @Override
            public void starting() {
                //Get start time from RunId
                long startTimeInSeconds = RunIds.getTime(runId, TimeUnit.SECONDS);
                if (startTimeInSeconds == -1) {
                    // If RunId is not time-based, use current time as start time
                    startTimeInSeconds = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
                }

                if (workflowName == null) {
                    store.setStart(programId, runId.getId(), startTimeInSeconds, null, twillRunId);
                } else {
                    // Program started by Workflow
                    store.setWorkflowProgramStart(programId, runId.getId(), workflowName, workflowRunId,
                            workflowNodeId, startTimeInSeconds, null, twillRunId);
                }
            }

            @Override
            public void terminated(Service.State from) {
                if (from == Service.State.STOPPING) {
                    // Service was killed
                    store.setStop(programId, runId.getId(),
                            TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()),
                            ProgramController.State.KILLED.getRunStatus());
                } else {
                    // Service completed by itself.
                    store.setStop(programId, runId.getId(),
                            TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()),
                            ProgramController.State.COMPLETED.getRunStatus());
                }
            }

            @Override
            public void failed(Service.State from, Throwable failure) {
                store.setStop(programId, runId.getId(), TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()),
                        ProgramController.State.ERROR.getRunStatus());
            }
        };
    }
}