co.cask.cdap.app.runtime.spark.distributed.DistributedSparkProgramRunner.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.app.runtime.spark.distributed.DistributedSparkProgramRunner.java

Source

/*
 * Copyright  2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.app.runtime.spark.distributed;

import co.cask.cdap.api.app.ApplicationSpecification;
import co.cask.cdap.api.spark.Spark;
import co.cask.cdap.api.spark.SparkSpecification;
import co.cask.cdap.app.program.Program;
import co.cask.cdap.app.runtime.ProgramController;
import co.cask.cdap.app.runtime.ProgramOptions;
import co.cask.cdap.app.runtime.ProgramRunner;
import co.cask.cdap.app.runtime.spark.SparkRuntimeContextConfig;
import co.cask.cdap.app.runtime.spark.SparkRuntimeUtils;
import co.cask.cdap.common.app.RunIds;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.lang.ProgramClassLoader;
import co.cask.cdap.common.lang.ProgramClassLoaderProvider;
import co.cask.cdap.internal.app.runtime.ProgramOptionConstants;
import co.cask.cdap.internal.app.runtime.distributed.AbstractDistributedProgramRunner;
import co.cask.cdap.internal.app.runtime.distributed.LocalizeResource;
import co.cask.cdap.internal.app.runtime.spark.SparkUtils;
import co.cask.cdap.proto.ProgramType;
import co.cask.cdap.security.TokenSecureStoreUpdater;
import com.google.common.base.Preconditions;
import com.google.inject.Inject;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.twill.api.RunId;
import org.apache.twill.api.TwillController;
import org.apache.twill.api.TwillRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.Map;

/**
 * A {@link ProgramRunner} for launching {@link Spark} program in distributed mode. It starts
 * a YARN application to act as the Spark client. A second YARN application will be launched
 * by Spark framework as the actual Spark program execution.
 */
public final class DistributedSparkProgramRunner extends AbstractDistributedProgramRunner
        implements ProgramClassLoaderProvider {

    private static final Logger LOG = LoggerFactory.getLogger(DistributedSparkProgramRunner.class);

    @Inject
    DistributedSparkProgramRunner(TwillRunner twillRunner, YarnConfiguration hConf, CConfiguration cConf,
            TokenSecureStoreUpdater tokenSecureStoreUpdater) {
        super(twillRunner, createConfiguration(hConf), cConf, tokenSecureStoreUpdater);
    }

    @Override
    protected ProgramController launch(Program program, ProgramOptions options,
            Map<String, LocalizeResource> localizeResources, File tempDir,
            AbstractDistributedProgramRunner.ApplicationLauncher launcher) {
        // Extract and verify parameters
        ApplicationSpecification appSpec = program.getApplicationSpecification();
        Preconditions.checkNotNull(appSpec, "Missing application specification for %s", program.getId());

        ProgramType processorType = program.getType();
        Preconditions.checkNotNull(processorType, "Missing processor type for %s", program.getId());
        Preconditions.checkArgument(processorType == ProgramType.SPARK,
                "Only SPARK process type is supported. Program type is %s for %s", processorType, program.getId());

        SparkSpecification spec = appSpec.getSpark().get(program.getName());
        Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getId());

        // Localize the spark-assembly jar and spark conf zip
        String sparkAssemblyJarName = SparkUtils.prepareSparkResources(tempDir, localizeResources);

        LOG.info("Launching Spark program: {}", program.getId());
        TwillController controller = launcher.launch(
                new SparkTwillApplication(program, spec, localizeResources, eventHandler), sparkAssemblyJarName);

        RunId runId = RunIds.fromString(options.getArguments().getOption(ProgramOptionConstants.RUN_ID));
        return new SparkTwillProgramController(program.getId().toEntityId(), controller, runId).startListen();
    }

    private static YarnConfiguration createConfiguration(YarnConfiguration hConf) {
        YarnConfiguration configuration = new YarnConfiguration(hConf);
        configuration.setBoolean(SparkRuntimeContextConfig.HCONF_ATTR_CLUSTER_MODE, true);
        return configuration;
    }

    @Override
    public ProgramClassLoader createProgramClassLoader(CConfiguration cConf, File dir) {
        return SparkRuntimeUtils.createProgramClassLoader(cConf, dir, getClass().getClassLoader());
    }
}