co.cask.cdap.internal.app.runtime.batch.distributed.MapReduceContainerHelper.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.internal.app.runtime.batch.distributed.MapReduceContainerHelper.java

Source

/*
 * Copyright  2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.internal.app.runtime.batch.distributed;

import co.cask.cdap.internal.app.runtime.distributed.LocalizeResource;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import javax.annotation.Nullable;

/**
 * A helper class for dealing with MapReduce framework localization and classpath settings based on different
 * hadoop configurations
 */
public final class MapReduceContainerHelper {

    private static final Logger LOG = LoggerFactory.getLogger(MapReduceContainerHelper.class);

    /**
     * Returns a list of path to be used for the MapReduce framework classpath.
     *
     * @param hConf the configuration for the job.
     * @param result a list for appending MR framework classpath
     * @return the same {@code result} list from the argument
     */
    public static List<String> getMapReduceClassPath(Configuration hConf, List<String> result) {
        String framework = hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);

        // For classpath config get from the hConf, we splits it with both "," and ":" because one can set
        // the conf with something like "path1,path2:path3" and
        // it should become "path1:path2:path3" in the target JVM process
        Splitter splitter = Splitter.on(Pattern.compile(",|" + File.pathSeparatorChar)).trimResults()
                .omitEmptyStrings();

        // If MR framework is non specified, use yarn.application.classpath and mapreduce.application.classpath
        // Otherwise, only use the mapreduce.application.classpath
        if (framework == null) {
            String yarnClassPath = hConf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
                    Joiner.on(",").join(YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH));
            Iterables.addAll(result, splitter.split(yarnClassPath));
        }

        // Add MR application classpath
        Iterables.addAll(result, splitter.split(hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH,
                MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH)));
        return result;
    }

    /**
     * Gets the MapReduce framework URI based on the {@code mapreduce.application.framework.path} setting.
     *
     * @param hConf the job configuration
     * @return the framework URI or {@code null} if not present or if the URI in the config is invalid.
     */
    @Nullable
    public static URI getFrameworkURI(Configuration hConf) {
        String framework = hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
        if (framework == null) {
            return null;
        }

        try {
            // Parse the path. It can contains '#' to represent the localized file name
            URI uri = new URI(framework);
            String linkName = uri.getFragment();

            // The following resolution logic is copied from JobSubmitter in MR.
            FileSystem fs = FileSystem.get(hConf);
            Path frameworkPath = fs.makeQualified(new Path(uri.getScheme(), uri.getAuthority(), uri.getPath()));
            FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), hConf);
            frameworkPath = fc.resolvePath(frameworkPath);
            uri = frameworkPath.toUri();

            // If doesn't have localized name (in the URI fragment), then use the last part of the URI path as name
            if (linkName == null) {
                linkName = uri.getPath();
                int idx = linkName.lastIndexOf('/');
                if (idx >= 0) {
                    linkName = linkName.substring(idx + 1);
                }
            }
            return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, linkName);
        } catch (URISyntaxException e) {
            LOG.warn("Failed to parse {} as a URI. MapReduce framework path is not used. Check the setting for {}.",
                    framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
        } catch (IOException e) {
            LOG.warn("Failed to resolve {} URI. MapReduce framework path is not used. Check the setting for {}.",
                    framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
        }
        return null;
    }

    /**
     * Sets the resources that need to be localized to program runner twill container that used as MapReduce client.
     *
     * @param hConf The hadoop configuration
     * @param localizeResources the map to be updated with the localized resources.
     * @return a list of extra classpaths need to be set for the program runner container.
     */
    public static List<String> localizeFramework(Configuration hConf,
            Map<String, LocalizeResource> localizeResources) {
        try {
            URI frameworkURI = getFrameworkURI(hConf);

            // If MR Application framework is used, need to localize the framework file to Twill container
            if (frameworkURI != null) {
                URI uri = new URI(frameworkURI.getScheme(), frameworkURI.getAuthority(), frameworkURI.getPath(),
                        null, null);
                localizeResources.put(frameworkURI.getFragment(), new LocalizeResource(uri, true));
            }
            return ImmutableList.copyOf(getMapReduceClassPath(hConf, new ArrayList<String>()));
        } catch (URISyntaxException e) {
            // Shouldn't happen since the frameworkURI is already parsed.
            throw Throwables.propagate(e);
        }
    }

    private MapReduceContainerHelper() {
        // no-op
    }
}