org.finra.dm.dao.impl.EmrDaoImpl.java Source code

Java tutorial

Introduction

Here is the source code for org.finra.dm.dao.impl.EmrDaoImpl.java

Source

/*
* Copyright 2015 herd contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.finra.dm.dao.impl;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import com.amazonaws.ClientConfiguration;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClient;
import com.amazonaws.services.elasticmapreduce.model.ActionOnFailure;
import com.amazonaws.services.elasticmapreduce.model.AddJobFlowStepsRequest;
import com.amazonaws.services.elasticmapreduce.model.Application;
import com.amazonaws.services.elasticmapreduce.model.BootstrapActionConfig;
import com.amazonaws.services.elasticmapreduce.model.Cluster;
import com.amazonaws.services.elasticmapreduce.model.ClusterSummary;
import com.amazonaws.services.elasticmapreduce.model.Configuration;
import com.amazonaws.services.elasticmapreduce.model.DescribeClusterRequest;
import com.amazonaws.services.elasticmapreduce.model.DescribeClusterResult;
import com.amazonaws.services.elasticmapreduce.model.DescribeStepRequest;
import com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig;
import com.amazonaws.services.elasticmapreduce.model.Instance;
import com.amazonaws.services.elasticmapreduce.model.InstanceGroupConfig;
import com.amazonaws.services.elasticmapreduce.model.InstanceGroupType;
import com.amazonaws.services.elasticmapreduce.model.InstanceRoleType;
import com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig;
import com.amazonaws.services.elasticmapreduce.model.ListClustersRequest;
import com.amazonaws.services.elasticmapreduce.model.ListClustersResult;
import com.amazonaws.services.elasticmapreduce.model.ListInstancesRequest;
import com.amazonaws.services.elasticmapreduce.model.ListStepsRequest;
import com.amazonaws.services.elasticmapreduce.model.MarketType;
import com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest;
import com.amazonaws.services.elasticmapreduce.model.ScriptBootstrapActionConfig;
import com.amazonaws.services.elasticmapreduce.model.Step;
import com.amazonaws.services.elasticmapreduce.model.StepConfig;
import com.amazonaws.services.elasticmapreduce.model.StepState;
import com.amazonaws.services.elasticmapreduce.model.StepSummary;
import com.amazonaws.services.elasticmapreduce.model.Tag;
import com.amazonaws.services.elasticmapreduce.util.StepFactory;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Repository;
import org.springframework.util.CollectionUtils;

import org.finra.dm.core.helper.ConfigurationHelper;
import org.finra.dm.dao.Ec2Dao;
import org.finra.dm.dao.EmrDao;
import org.finra.dm.dao.EmrOperations;
import org.finra.dm.dao.helper.DmStringHelper;
import org.finra.dm.dao.helper.EmrHelper;
import org.finra.dm.model.dto.AwsParamsDto;
import org.finra.dm.model.dto.ConfigurationValue;
import org.finra.dm.model.api.xml.ConfigurationFile;
import org.finra.dm.model.api.xml.ConfigurationFiles;
import org.finra.dm.model.api.xml.EmrClusterDefinition;
import org.finra.dm.model.api.xml.EmrClusterDefinitionApplication;
import org.finra.dm.model.api.xml.EmrClusterDefinitionConfiguration;
import org.finra.dm.model.api.xml.HadoopJarStep;
import org.finra.dm.model.api.xml.KeyValuePairConfiguration;
import org.finra.dm.model.api.xml.KeyValuePairConfigurations;
import org.finra.dm.model.api.xml.NodeTag;
import org.finra.dm.model.api.xml.Parameter;
import org.finra.dm.model.api.xml.ScriptDefinition;

/**
 * The EMR DAO implementation.
 */
@Repository
public class EmrDaoImpl implements EmrDao {
    // Environment for accessing DB properties
    @Autowired
    private ConfigurationHelper configurationHelper;

    @Autowired
    private EmrOperations emrOperations;

    @Autowired
    private Ec2Dao ec2Dao;

    @Autowired
    private DmStringHelper dmStringHelper;

    @Autowired
    private EmrHelper emrHelper;

    /**
     * Add an EMR Step. This method adds the step to EMR cluster based on the input.
     *
     * @param clusterName EMR cluster name.
     * @param emrStepConfig the EMR step config to be added.
     * @param awsParamsDto the proxy details.
     * <p/>
     * There are five serializable objects supported currently. They are 1: ShellStep - For shell scripts 2: OozieStep - For Oozie workflow xml files 3:
     * HiveStep - For hive scripts 4: HadoopJarStep - For Custom Map Reduce Jar files and 5: PigStep - For Pig scripts.
     *
     * @return the step id
     */
    @Override
    public String addEmrStep(String clusterName, StepConfig emrStepConfig, AwsParamsDto awsParamsDto)
            throws Exception {
        List<StepConfig> steps = new ArrayList<>();

        // Get the EMR cluster id
        String clusterId = getValidEmrClusterIdByName(clusterName, awsParamsDto);

        steps.add(emrStepConfig);

        // Add the job flow request
        AddJobFlowStepsRequest jobFlowStepRequest = new AddJobFlowStepsRequest(clusterId, steps);
        List<String> emrStepIds = emrOperations.addJobFlowStepsRequest(getEmrClient(awsParamsDto),
                jobFlowStepRequest);

        return emrStepIds.get(0);
    }

    /**
     * Add Security groups to the master node of EMR cluster.
     *
     * @param clusterName EMR cluster name.
     * @param securityGroups the security groups list.
     * @param awsParams the proxy details.
     *
     * @return the security groups that were added.
     */
    @Override
    public List<String> addEmrMasterSecurityGroups(String clusterName, List<String> securityGroups,
            AwsParamsDto awsParams) throws Exception {
        // Look up cluster
        String clusterId = getValidEmrClusterIdByName(clusterName, awsParams);

        // Get the master EC2 instance
        ListInstancesRequest listInstancesRequest = new ListInstancesRequest().withClusterId(clusterId)
                .withInstanceGroupTypes(InstanceGroupType.MASTER);

        List<Instance> instances = emrOperations
                .listClusterInstancesRequest(getEmrClient(awsParams), listInstancesRequest).getInstances();

        // Throw error in case there are no master instances found yet
        if (instances.size() == 0) {
            throw new IllegalArgumentException(
                    "No master instances found for the cluster \"" + clusterName + "\".");
        }

        for (Instance instance : instances) {
            ec2Dao.addSecurityGroupsToEc2Instance(instance.getEc2InstanceId(), securityGroups, awsParams);
        }

        return securityGroups;
    }

    /**
     * Gets the master instance of the EMR cluster.
     *
     * @param clusterId EMR cluster id.
     * @param awsParams the proxy details.
     *
     * @return the master instance of the cluster.
     */
    @Override
    public Instance getEmrMasterInstance(String clusterId, AwsParamsDto awsParams) throws Exception {
        // Get the master EC2 instance
        ListInstancesRequest listInstancesRequest = new ListInstancesRequest().withClusterId(clusterId)
                .withInstanceGroupTypes(InstanceGroupType.MASTER);

        List<Instance> instances = emrOperations
                .listClusterInstancesRequest(getEmrClient(awsParams), listInstancesRequest).getInstances();

        // Throw error in case there are no master instances found yet
        if (instances.size() == 0) {
            throw new IllegalArgumentException("No master instances found for the cluster \"" + clusterId + "\".");
        }

        // EMR has only one master node.
        return instances.get(0);
    }

    /**
     * Create the EMR cluster.
     *
     * @param awsParams AWS related parameters for access/secret keys and proxy details.
     * @param emrClusterDefinition the EMR cluster definition that contains all the EMR parameters.
     * @param clusterName the cluster name value.
     *
     * @return the cluster Id.
     */
    @Override
    public String createEmrCluster(String clusterName, EmrClusterDefinition emrClusterDefinition,
            AwsParamsDto awsParams) {
        return emrOperations.runEmrJobFlow(getEmrClient(awsParams),
                getRunJobFlowRequest(clusterName, emrClusterDefinition));
    }

    /**
     * Terminates the EMR cluster.
     *
     * @param clusterName the cluster name.
     * @param awsParams AWS related parameters for access/secret keys and proxy details.
     *
     * @return the cluster Id.
     */
    @Override
    public String terminateEmrCluster(String clusterName, boolean overrideTerminationProtection,
            AwsParamsDto awsParams) {
        // Get the cluster Id 
        String clusterId = getValidEmrClusterIdByName(clusterName, awsParams);

        emrOperations.terminateEmrCluster(getEmrClient(awsParams), clusterId, overrideTerminationProtection);

        return clusterId;
    }

    /**
     * Get EMR cluster by cluster Id.
     *
     * @param clusterId the job Id returned by EMR for the cluster.
     * @param awsParams AWS related parameters for access/secret keys and proxy details.
     *
     * @return the cluster status.
     */
    @Override
    public Cluster getEmrClusterById(String clusterId, AwsParamsDto awsParams) {
        Cluster cluster = null;
        if (StringUtils.isNotBlank(clusterId)) {
            DescribeClusterResult describeClusterResult = emrOperations.describeClusterRequest(
                    getEmrClient(awsParams), new DescribeClusterRequest().withClusterId(clusterId));
            if (describeClusterResult != null && describeClusterResult.getCluster() != null) {
                cluster = describeClusterResult.getCluster();
            }
        }

        return cluster;
    }

    /**
     * Get EMR cluster status by cluster Id.
     *
     * @param clusterId the job Id returned by EMR for the cluster.
     * @param awsParams AWS related parameters for access/secret keys and proxy details.
     *
     * @return the cluster status.
     */
    @Override
    public String getEmrClusterStatusById(String clusterId, AwsParamsDto awsParams) {
        Cluster cluster = getEmrClusterById(clusterId, awsParams);

        return ((cluster == null) ? null : cluster.getStatus().getState());
    }

    /**
     * Get Active EMR cluster Id by the cluster name.
     *
     * @param awsParams AWS related parameters for access/secret keys and proxy details.
     * @param clusterName the cluster name value.
     *
     * @return the cluster Id from EMR.
     * @throws IllegalArgumentException if no active cluster is found.
     */
    private String getValidEmrClusterIdByName(String clusterName, AwsParamsDto awsParams) {
        // Get the cluster Id 
        String clusterId = getActiveEmrClusterIdByName(clusterName, awsParams);

        // Throw error in case no cluster is found.
        if (StringUtils.isBlank(clusterId)) {
            throw new IllegalArgumentException("The cluster \"" + clusterName + "\" does not exist.");
        }

        return clusterId;
    }

    /**
     * Get an Active EMR cluster id by the cluster name. Cluster only in following states are returned: ClusterState.BOOTSTRAPPING, ClusterState.RUNNING,
     * ClusterState.STARTING, ClusterState.WAITING
     *
     * @param awsParams AWS related parameters for access/secret keys and proxy details.
     * @param clusterName the cluster name value.
     *
     * @return the ClusterSummary object.
     */
    @Override
    public String getActiveEmrClusterIdByName(String clusterName, AwsParamsDto awsParams) {
        ClusterSummary clusterSummary = getActiveEmrClusterByName(clusterName, awsParams);

        return (clusterSummary == null ? null : clusterSummary.getId());
    }

    /**
     * Get an Active EMR cluster by the cluster name. Cluster only in following states are returned: ClusterState.BOOTSTRAPPING, ClusterState.RUNNING,
     * ClusterState.STARTING, ClusterState.WAITING
     *
     * @param awsParams AWS related parameters for access/secret keys and proxy details.
     * @param clusterName the cluster name value.
     *
     * @return the ClusterSummary object.
     */
    @Override
    public ClusterSummary getActiveEmrClusterByName(String clusterName, AwsParamsDto awsParams) {
        if (StringUtils.isNotBlank(clusterName)) {
            /**
             * Call AWSOperations for ListClusters API. Need to list all the active clusters that are in
             * BOOTSTRAPPING/RUNNING/STARTING/WAITING states
             */
            ListClustersRequest listClustersRequest = new ListClustersRequest()
                    .withClusterStates(getActiveEmrClusterStates());

            /**
             * ListClusterRequest returns only 50 clusters at a time. However, this returns a marker
             * that can be used for subsequent calls to listClusters to get all the clusters
             */
            String markerForListClusters = listClustersRequest.getMarker();

            // Loop through all the available clusters and look for the given cluster id
            do {
                /**
                 * Call AWSOperations for ListClusters API.
                 * Need to include the Marker returned by the previous iteration
                 */
                ListClustersResult clusterResult = emrOperations.listEmrClusters(getEmrClient(awsParams),
                        listClustersRequest.withMarker(markerForListClusters));

                // Loop through all the active clusters returned by AWS
                for (ClusterSummary clusterInstance : clusterResult.getClusters()) {
                    // If the cluster name matches, then return the status
                    if (StringUtils.isNotBlank(clusterInstance.getName())
                            && clusterInstance.getName().equalsIgnoreCase(clusterName)) {
                        return clusterInstance;
                    }
                }
                markerForListClusters = clusterResult.getMarker();
            } while (markerForListClusters != null);
        }

        return null;
    }

    /**
     * Gets the active step on the cluster if any.
     *
     * @param clusterId, the cluster id.
     * @param awsParamsDto, AWS related parameters for access/secret keys and proxy details.
     *
     * @return the step summary object.
     */
    @Override
    public StepSummary getClusterActiveStep(String clusterId, AwsParamsDto awsParamsDto) {
        ListStepsRequest listStepsRequest = new ListStepsRequest().withClusterId(clusterId)
                .withStepStates(StepState.RUNNING);
        List<StepSummary> stepSummaryList = emrOperations
                .listStepsRequest(getEmrClient(awsParamsDto), listStepsRequest).getSteps();

        return (stepSummaryList != null && stepSummaryList.size() > 0) ? stepSummaryList.get(0) : null;
    }

    /**
     * Gets the step on the cluster.
     *
     * @param clusterId, the cluster id.
     * @param stepId, the step id to get details of.
     * @param awsParamsDto, AWS related parameters for access/secret keys and proxy details.
     *
     * @return the step object.
     */
    @Override
    public Step getClusterStep(String clusterId, String stepId, AwsParamsDto awsParamsDto) {
        DescribeStepRequest describeStepRequest = new DescribeStepRequest().withClusterId(clusterId)
                .withStepId(stepId);
        return emrOperations.describeStepRequest(getEmrClient(awsParamsDto), describeStepRequest).getStep();
    }

    /**
     * Create the EMR client with the given proxy and access key details.
     *
     * @param awsParamsDto AWS related parameters for access/secret keys and proxy details.
     *
     * @return the AmazonElasticMapReduceClient object.
     */
    @Override
    public AmazonElasticMapReduceClient getEmrClient(AwsParamsDto awsParamsDto) {
        // TODO Building EMR client every time requested, if this becomes a performance issue, 
        // might need to consider storing a singleton or building the client once per request.   

        AmazonElasticMapReduceClient emrClient;

        // Create an EMR client with HTTP proxy information.
        if (StringUtils.isNotBlank(awsParamsDto.getHttpProxyHost())
                && StringUtils.isNotBlank(awsParamsDto.getHttpProxyPort().toString())) {
            emrClient = new AmazonElasticMapReduceClient(new ClientConfiguration()
                    .withProxyHost(awsParamsDto.getHttpProxyHost()).withProxyPort(awsParamsDto.getHttpProxyPort()));
        }
        // Create an EMR client with no proxy information
        else {
            emrClient = new AmazonElasticMapReduceClient();
        }

        // Return the client.
        return emrClient;
    }

    private String[] getActiveEmrClusterStates() {
        String emrStatesString = configurationHelper.getProperty(ConfigurationValue.EMR_VALID_STATES);
        return emrStatesString
                .split("\\" + configurationHelper.getProperty(ConfigurationValue.FIELD_DATA_DELIMITER));
    }

    /**
     * Get the S3_STAGING_RESOURCE full path from the bucket name as well as other details.
     *
     * @return the s3 managed location.
     */
    private String getS3StagingLocation() {
        return configurationHelper.getProperty(ConfigurationValue.S3_URL_PROTOCOL)
                + configurationHelper.getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME)
                + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + configurationHelper.getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE);
    }

    /**
     * Create the instance group configuration.
     *
     * @param roleType role type for the instance group (MASTER/CORE/TASK).
     * @param instanceType EC2 instance type for the instance group.
     * @param instanceCount number of instances for the instance group.
     * @param bidPrice bid price in case of SPOT instance request.
     *
     * @return the instance group config object.
     */
    private InstanceGroupConfig getInstanceGroupConfig(InstanceRoleType roleType, String instanceType,
            Integer instanceCount, BigDecimal bidPrice) {
        InstanceGroupConfig instanceGroup = new InstanceGroupConfig(roleType, instanceType, instanceCount);

        // Consider spot price, if specified
        if (bidPrice != null) {
            instanceGroup.setMarket(MarketType.SPOT);
            instanceGroup.setBidPrice(bidPrice.toString());
        }
        return instanceGroup;
    }

    /**
     * Create the instance group configuration for MASTER/CORE/TASK nodes as per the input parameters.
     *
     * @param emrClusterDefinition the EMR cluster definition that contains all the EMR parameters.
     *
     * @return the instance group config list with all the instance group definitions.
     */
    private ArrayList<InstanceGroupConfig> getInstanceGroupConfig(EmrClusterDefinition emrClusterDefinition) {
        // Create the instance groups
        ArrayList<InstanceGroupConfig> emrInstanceGroups = new ArrayList<>();

        // Fill-in the MASTER node details.
        emrInstanceGroups.add(getInstanceGroupConfig(InstanceRoleType.MASTER,
                emrClusterDefinition.getInstanceDefinitions().getMasterInstances().getInstanceType(),
                emrClusterDefinition.getInstanceDefinitions().getMasterInstances().getInstanceCount(),
                emrClusterDefinition.getInstanceDefinitions().getMasterInstances().getInstanceSpotPrice()));

        // Fill-in the CORE node details
        emrInstanceGroups.add(getInstanceGroupConfig(InstanceRoleType.CORE,
                emrClusterDefinition.getInstanceDefinitions().getCoreInstances().getInstanceType(),
                emrClusterDefinition.getInstanceDefinitions().getCoreInstances().getInstanceCount(),
                emrClusterDefinition.getInstanceDefinitions().getCoreInstances().getInstanceSpotPrice()));

        // Fill-in the TASK node details, if the optional task instances are specified.
        if (emrClusterDefinition.getInstanceDefinitions().getTaskInstances() != null) {
            emrInstanceGroups.add(getInstanceGroupConfig(InstanceRoleType.TASK,
                    emrClusterDefinition.getInstanceDefinitions().getTaskInstances().getInstanceType(),
                    emrClusterDefinition.getInstanceDefinitions().getTaskInstances().getInstanceCount(),
                    emrClusterDefinition.getInstanceDefinitions().getTaskInstances().getInstanceSpotPrice()));
        }

        return emrInstanceGroups;
    }

    /**
     * Create the job flow instance configuration which contains all the job flow configuration details.
     *
     * @param emrClusterDefinition the EMR cluster definition that contains all the EMR parameters.
     *
     * @return the job flow instance configuration.
     */
    private JobFlowInstancesConfig getJobFlowInstancesConfig(EmrClusterDefinition emrClusterDefinition) {
        // Create a new job flow instance config object
        JobFlowInstancesConfig jobFlowInstancesConfig = new JobFlowInstancesConfig();

        // Add the DM EMR support security group as additional group to master node.
        String additionalSecurityGroup = configurationHelper
                .getProperty(ConfigurationValue.EMR_DM_SUPPORT_SECURITY_GROUP);

        if (StringUtils.isNotBlank(additionalSecurityGroup)) {
            List<String> additionalSecurityGroups = new ArrayList<>();
            additionalSecurityGroups.add(additionalSecurityGroup);
            jobFlowInstancesConfig.setAdditionalMasterSecurityGroups(additionalSecurityGroups);
        }

        // Fill-in the ssh key
        if (StringUtils.isNotBlank(emrClusterDefinition.getSshKeyPairName())) {
            jobFlowInstancesConfig.setEc2KeyName(emrClusterDefinition.getSshKeyPairName());
        }

        // Fill-in subnet id
        if (StringUtils.isNotBlank(emrClusterDefinition.getSubnetId())) {
            jobFlowInstancesConfig.setEc2SubnetId(emrClusterDefinition.getSubnetId());
        }

        // Fill in instance groups
        jobFlowInstancesConfig.setInstanceGroups(getInstanceGroupConfig(emrClusterDefinition));

        // Check for optional parameters and then fill-in
        // Keep Alive Cluster flag
        if (emrClusterDefinition.isKeepAlive() != null) {
            jobFlowInstancesConfig.setKeepJobFlowAliveWhenNoSteps(emrClusterDefinition.isKeepAlive());
        }

        // Termination protection flag
        if (emrClusterDefinition.isTerminationProtection() != null) {
            jobFlowInstancesConfig.setTerminationProtected(emrClusterDefinition.isTerminationProtection());
        }

        // Setting the hadoop version
        if (StringUtils.isNotBlank(emrClusterDefinition.getHadoopVersion())) {
            jobFlowInstancesConfig.setHadoopVersion(emrClusterDefinition.getHadoopVersion());
        }

        // Return the object
        return jobFlowInstancesConfig;
    }

    /**
     * Create the BootstrapActionConfig object from the bootstrap script.
     *
     * @param scriptDescription bootstrap script name to be displayed.
     * @param bootstrapScript location of the bootstrap script.
     *
     * @return bootstrap action configuration that contains all the bootstrap actions for the given configuration.
     */
    private BootstrapActionConfig getBootstrapActionConfig(String scriptDescription, String bootstrapScript) {
        // Create the BootstrapActionConfig object
        BootstrapActionConfig bootstrapConfig = new BootstrapActionConfig();
        ScriptBootstrapActionConfig bootstrapConfigScript = new ScriptBootstrapActionConfig();

        // Set the bootstrapScript
        bootstrapConfig.setName(scriptDescription);
        bootstrapConfigScript.setPath(bootstrapScript);
        bootstrapConfig.setScriptBootstrapAction(bootstrapConfigScript);

        // Return the object
        return bootstrapConfig;
    }

    /**
     * Get the encryption script location from the bucket name and encryption script location.
     *
     * @return location of the encryption script.
     */
    private String getEncryptionScriptLocation() {
        // Whenever the user requests for encryption, we have an encryption script that is stored in DM bucket.
        // We use this encryption script to encrypt all the volumes of all the instances.
        // Amazon plans to support encryption in EMR soon. Once that support is enabled, we can remove this script and use the one provided by AWS.
        return getS3StagingLocation() + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + configurationHelper.getProperty(ConfigurationValue.EMR_ENCRYPTION_SCRIPT);
    }

    /**
     * Get the Oozie installation script location from the bucket name and Oozie installation script location.
     *
     * @return location of the Oozie installation script.
     */
    private String getOozieScriptLocation() {
        // Oozie is currently not supported by Amazon as a bootstrapping step
        // So, we are using our own Oozie installation script on the Master node to install Oozie
        // Once Amazon rolls out Oozie support, this can be removed and AWS Ooize steps can be added later
        return getS3StagingLocation() + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + configurationHelper.getProperty(ConfigurationValue.EMR_OOZIE_SCRIPT);
    }

    /**
     * Create the bootstrap action configuration List from all the bootstrapping scripts specified.
     *
     * @param emrClusterDefinition the EMR definition name value.
     *
     * @return list of bootstrap action configurations that contains all the bootstrap actions for the given configuration.
     */
    private ArrayList<BootstrapActionConfig> getBootstrapActionConfigList(
            EmrClusterDefinition emrClusterDefinition) {
        // Create the list
        ArrayList<BootstrapActionConfig> bootstrapActions = new ArrayList<>();

        // Add encryption script support if needed
        if (emrClusterDefinition.isEncryptionEnabled() != null && emrClusterDefinition.isEncryptionEnabled()) {
            bootstrapActions.add(getBootstrapActionConfig(ConfigurationValue.EMR_ENCRYPTION_SCRIPT.getKey(),
                    getEncryptionScriptLocation()));
        }

        // Add bootstrap actions.
        addDaemonBootstrapActionConfig(emrClusterDefinition, bootstrapActions);
        addHadoopBootstrapActionConfig(emrClusterDefinition, bootstrapActions);
        addCustomBootstrapActionConfig(emrClusterDefinition, bootstrapActions);
        addCustomMasterBootstrapActionConfig(emrClusterDefinition, bootstrapActions);

        // Return the object
        return bootstrapActions;
    }

    private void addDaemonBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition,
            ArrayList<BootstrapActionConfig> bootstrapActions) {
        // Add daemon Configuration support if needed
        if (!CollectionUtils.isEmpty(emrClusterDefinition.getDaemonConfigurations())) {
            BootstrapActionConfig daemonBootstrapActionConfig = getBootstrapActionConfig(
                    ConfigurationValue.EMR_CONFIGURE_DAEMON.getKey(),
                    configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON));

            // Add arguments to the bootstrap script
            ArrayList<String> argList = new ArrayList<>();
            for (Parameter daemonConfig : emrClusterDefinition.getDaemonConfigurations()) {
                argList.add(daemonConfig.getName() + "=" + daemonConfig.getValue());
            }

            // Add the bootstrap action with arguments
            daemonBootstrapActionConfig.getScriptBootstrapAction().setArgs(argList);
            bootstrapActions.add(daemonBootstrapActionConfig);
        }
    }

    private void addHadoopBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition,
            ArrayList<BootstrapActionConfig> bootstrapActions) {
        // Add hadoop Configuration support if needed
        if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopConfigurations())) {
            ArrayList<String> argList = new ArrayList<>();
            BootstrapActionConfig hadoopBootstrapActionConfig = getBootstrapActionConfig(
                    ConfigurationValue.EMR_CONFIGURE_HADOOP.getKey(),
                    configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_HADOOP));
            // If config files are available, add them as arguments
            for (Object hadoopConfigObject : emrClusterDefinition.getHadoopConfigurations()) {
                // If the Config Files are available, add them as arguments
                if (hadoopConfigObject instanceof ConfigurationFiles) {
                    for (ConfigurationFile configurationFile : ((ConfigurationFiles) hadoopConfigObject)
                            .getConfigurationFiles()) {
                        argList.add(configurationFile.getFileNameShortcut());
                        argList.add(configurationFile.getConfigFileLocation());
                    }
                }

                // If the key value pairs are available, add them as arguments
                if (hadoopConfigObject instanceof KeyValuePairConfigurations) {
                    for (KeyValuePairConfiguration keyValuePairConfiguration : ((KeyValuePairConfigurations) hadoopConfigObject)
                            .getKeyValuePairConfigurations()) {
                        argList.add(keyValuePairConfiguration.getKeyValueShortcut());
                        argList.add(keyValuePairConfiguration.getAttribKey() + "="
                                + keyValuePairConfiguration.getAttribVal());
                    }
                }
            }
            if (!CollectionUtils.isEmpty(argList)) {
                // Add the bootstrap action with arguments
                hadoopBootstrapActionConfig.getScriptBootstrapAction().setArgs(argList);
                bootstrapActions.add(hadoopBootstrapActionConfig);
            }
        }
    }

    private void addCustomBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition,
            ArrayList<BootstrapActionConfig> bootstrapActions) {
        // Add Custom bootstrap script support if needed
        if (!CollectionUtils.isEmpty(emrClusterDefinition.getCustomBootstrapActionAll())) {
            for (ScriptDefinition scriptDefinition : emrClusterDefinition.getCustomBootstrapActionAll()) {
                BootstrapActionConfig customActionConfigAll = getBootstrapActionConfig(
                        scriptDefinition.getScriptName(), scriptDefinition.getScriptLocation());

                ArrayList<String> argList = new ArrayList<>();
                if (!CollectionUtils.isEmpty(scriptDefinition.getScriptArguments())) {
                    for (String argument : scriptDefinition.getScriptArguments()) {
                        // Trim the argument
                        argList.add(argument.trim());
                    }
                }
                // Set arguments to bootstrap action
                customActionConfigAll.getScriptBootstrapAction().setArgs(argList);

                bootstrapActions.add(customActionConfigAll);
            }
        }
    }

    private void addCustomMasterBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition,
            ArrayList<BootstrapActionConfig> bootstrapActions) {
        // Add Master custom bootstrap script support if needed
        if (!CollectionUtils.isEmpty(emrClusterDefinition.getCustomBootstrapActionMaster())) {
            for (ScriptDefinition scriptDefinition : emrClusterDefinition.getCustomBootstrapActionMaster()) {
                BootstrapActionConfig bootstrapActionConfig = getBootstrapActionConfig(
                        scriptDefinition.getScriptName(),
                        configurationHelper.getProperty(ConfigurationValue.EMR_CONDITIONAL_SCRIPT));

                // Add arguments to the bootstrap script
                ArrayList<String> argList = new ArrayList<>();

                // Execute this script only on the master node.
                argList.add(configurationHelper.getProperty(ConfigurationValue.EMR_NODE_CONDITION));
                argList.add(scriptDefinition.getScriptLocation());

                if (!CollectionUtils.isEmpty(scriptDefinition.getScriptArguments())) {
                    for (String argument : scriptDefinition.getScriptArguments()) {
                        // Trim the argument
                        argList.add(argument.trim());
                    }
                }

                bootstrapActionConfig.getScriptBootstrapAction().setArgs(argList);
                bootstrapActions.add(bootstrapActionConfig);
            }
        }
    }

    /**
     * Create the step config list of objects for hive/pig installation.
     *
     * @param emrClusterDefinition the EMR definition name value.
     *
     * @return list of step configuration that contains all the steps for the given configuration.
     */
    private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) {
        StepFactory stepFactory = new StepFactory();
        List<StepConfig> appSteps = new ArrayList<>();

        String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR);

        // Add step to copy DM oozie wrapper workflow to HDFS.
        String wrapperWorkflowS3Location = getS3LocationForConfiguration(
                emrHelper.getEmrOozieDmWorkflowS3LocationConfiguration());

        String wrapperWorkflowHdfsLocation = configurationHelper
                .getProperty(ConfigurationValue.EMR_OOZIE_DM_WRAPPER_WORKFLOW_HDFS_LOCATION);

        List<String> s3ToHdfsCopyScriptArgsList = new ArrayList<>();

        s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location + emrHelper.getS3HdfsCopyScriptName());

        // 1. Source S3 location
        // 2. Target HDFS location.
        // 3. Temp folder to use on local node.
        s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location);
        s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowHdfsLocation);
        s3ToHdfsCopyScriptArgsList.add(UUID.randomUUID().toString());

        HadoopJarStepConfig copyWrapperJarConfig = new HadoopJarStepConfig(hadoopJarForShellScript)
                .withArgs(s3ToHdfsCopyScriptArgsList);
        appSteps.add(new StepConfig().withName("Copy DM oozie wrapper").withHadoopJarStep(copyWrapperJarConfig));

        // Create install hive step and add to the StepConfig list
        if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) {
            StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion())
                    .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                    .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion()));
            appSteps.add(installHive);
        }

        // Create install Pig step and add to the StepConfig List
        if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) {
            StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion())
                    .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                    .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion()));
            appSteps.add(installPig);
        }

        // Add Oozie support if needed
        if (emrClusterDefinition.isInstallOozie() != null && emrClusterDefinition.isInstallOozie()) {
            String oozieShellArg = getS3StagingLocation()
                    + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                    + configurationHelper.getProperty(ConfigurationValue.EMR_OOZIE_TAR_FILE);

            List<String> argsList = new ArrayList<>();
            argsList.add(getOozieScriptLocation());
            argsList.add(oozieShellArg);

            HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
            appSteps.add(new StepConfig().withName("Oozie").withHadoopJarStep(jarConfig));
        }

        // Add the hadoop jar steps that need to be added.
        if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) {
            for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) {
                StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(),
                        hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(),
                        hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError());

                appSteps.add(stepConfig);
            }
        }

        return appSteps;
    }

    /**
     * Get the absolute S3 location for given configuration key.
     *
     * @return location of the configuration key on S3.
     */
    private String getS3LocationForConfiguration(ConfigurationValue configurationValue) {
        return getS3StagingLocation() + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + configurationHelper.getProperty(configurationValue);
    }

    /**
     * Create the tag list for the EMR nodes.
     *
     * @param emrClusterDefinition the EMR definition name value.
     *
     * @return list of all tag definitions for the given configuration.
     */
    private List<Tag> getEmrTags(EmrClusterDefinition emrClusterDefinition) {
        List<Tag> tags = new ArrayList<>();

        // Get the nodeTags from xml
        for (NodeTag thisTag : emrClusterDefinition.getNodeTags()) {
            // Create a AWS tag and add
            if (StringUtils.isNotBlank(thisTag.getTagName()) && StringUtils.isNotBlank(thisTag.getTagValue())) {
                tags.add(new Tag(thisTag.getTagName(), thisTag.getTagValue()));
            }
        }

        // Return the object
        return tags;
    }

    /**
     * Create the run job flow request object.
     *
     * @param emrClusterDefinition the EMR definition name value.
     * @param clusterName the EMR cluster name.
     *
     * @return run job flow request for the given configuration.
     */
    private RunJobFlowRequest getRunJobFlowRequest(String clusterName, EmrClusterDefinition emrClusterDefinition) {
        // Create the object
        RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(clusterName,
                getJobFlowInstancesConfig(emrClusterDefinition));

        // Set release label
        if (StringUtils.isNotBlank(emrClusterDefinition.getReleaseLabel())) {
            runJobFlowRequest.setReleaseLabel(emrClusterDefinition.getReleaseLabel());
        }

        // Set list of Applications
        List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications = emrClusterDefinition
                .getApplications();
        if (!CollectionUtils.isEmpty(emrClusterDefinitionApplications)) {
            runJobFlowRequest.setApplications(getApplications(emrClusterDefinitionApplications));
        }

        // Set list of Configurations
        List<EmrClusterDefinitionConfiguration> emrClusterDefinitionConfigurations = emrClusterDefinition
                .getConfigurations();
        if (!CollectionUtils.isEmpty(emrClusterDefinitionConfigurations)) {
            runJobFlowRequest.setConfigurations(getConfigurations(emrClusterDefinitionConfigurations));
        }

        // Set the log bucket if specified
        if (StringUtils.isNotBlank(emrClusterDefinition.getLogBucket())) {
            runJobFlowRequest.setLogUri(emrClusterDefinition.getLogBucket());
        }

        // Set the visible to all flag
        if (emrClusterDefinition.isVisibleToAll() != null) {
            runJobFlowRequest.setVisibleToAllUsers(emrClusterDefinition.isVisibleToAll());
        }

        // Set the IAM profile for the nodes
        if (StringUtils.isNotBlank(emrClusterDefinition.getEc2NodeIamProfileName())) {
            runJobFlowRequest.setJobFlowRole(emrClusterDefinition.getEc2NodeIamProfileName());
        } else {
            runJobFlowRequest.setJobFlowRole(dmStringHelper
                    .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME));
        }

        // Set the IAM profile for the service
        if (StringUtils.isNotBlank(emrClusterDefinition.getServiceIamRole())) {
            runJobFlowRequest.setServiceRole(emrClusterDefinition.getServiceIamRole());
        } else {
            runJobFlowRequest.setServiceRole(dmStringHelper
                    .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME));
        }

        // Set the AMI version if specified
        if (StringUtils.isNotBlank(emrClusterDefinition.getAmiVersion())) {
            runJobFlowRequest.setAmiVersion(emrClusterDefinition.getAmiVersion());
        }

        // Set the additionalInfo if specified
        if (StringUtils.isNotBlank(emrClusterDefinition.getAdditionalInfo())) {
            runJobFlowRequest.setAdditionalInfo(emrClusterDefinition.getAdditionalInfo());
        }

        // Set the bootstrap actions
        if (!getBootstrapActionConfigList(emrClusterDefinition).isEmpty()) {
            runJobFlowRequest.setBootstrapActions(getBootstrapActionConfigList(emrClusterDefinition));
        }

        // Set the app installation steps
        if (!getStepConfig(emrClusterDefinition).isEmpty()) {
            runJobFlowRequest.setSteps(getStepConfig(emrClusterDefinition));
        }

        // Set the tags
        if (!getEmrTags(emrClusterDefinition).isEmpty()) {
            runJobFlowRequest.setTags(getEmrTags(emrClusterDefinition));
        }

        // Assign supported products as applicable
        if (StringUtils.isNotBlank(emrClusterDefinition.getSupportedProduct())) {
            List<String> supportedProducts = new ArrayList<>();
            supportedProducts.add(emrClusterDefinition.getSupportedProduct());
            runJobFlowRequest.setSupportedProducts(supportedProducts);
        }

        // Return the object
        return runJobFlowRequest;
    }

    /**
     * Converts the given list of {@link EmrClusterDefinitionApplication} into a list of {@link Application}
     * 
     * @param emrClusterDefinitionApplications list of {@link EmrClusterDefinitionApplication}
     * @return list {@link Application}
     */
    public List<Application> getApplications(
            List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications) {
        List<Application> applications = new ArrayList<>();
        for (EmrClusterDefinitionApplication emrClusterDefinitionApplication : emrClusterDefinitionApplications) {
            Application application = new Application();
            application.setName(emrClusterDefinitionApplication.getName());
            application.setVersion(emrClusterDefinitionApplication.getVersion());
            application.setArgs(emrClusterDefinitionApplication.getArgs());

            List<Parameter> additionalInfoList = emrClusterDefinitionApplication.getAdditionalInfoList();
            if (!CollectionUtils.isEmpty(additionalInfoList)) {
                application.setAdditionalInfo(getMap(additionalInfoList));
            }

            applications.add(application);
        }
        return applications;
    }

    /**
     * Converts the given list of {@link Parameter} into a {@link Map} of {@link String}, {@link String}
     * @param parameters List of {@link Parameter}
     * @return {@link Map}
     */
    public Map<String, String> getMap(List<Parameter> parameters) {
        HashMap<String, String> map = new HashMap<String, String>();
        for (Parameter parameter : parameters) {
            map.put(parameter.getName(), parameter.getValue());
        }
        return map;
    }

    /**
     * Converts the given list of {@link EmrClusterDefinitionConfiguration} into a list of {@link Configuration}.
     * 
     * @param emrClusterDefinitionConfigurations list of {@link EmrClusterDefinitionConfiguration}
     * @return list of {@link Configuration}
     */
    public List<Configuration> getConfigurations(
            List<EmrClusterDefinitionConfiguration> emrClusterDefinitionConfigurations) {
        List<Configuration> result = new ArrayList<>();
        for (EmrClusterDefinitionConfiguration emrClusterDefinitionConfiguration : emrClusterDefinitionConfigurations) {
            Configuration configuration = new Configuration();

            configuration.setClassification(emrClusterDefinitionConfiguration.getClassification());

            // Child configurations are gotten recursively
            List<EmrClusterDefinitionConfiguration> requestedConfigurations = emrClusterDefinitionConfiguration
                    .getConfigurations();
            if (!CollectionUtils.isEmpty(requestedConfigurations)) {
                configuration.setConfigurations(getConfigurations(requestedConfigurations));
            }

            List<Parameter> properties = emrClusterDefinitionConfiguration.getProperties();
            if (!CollectionUtils.isEmpty(properties)) {
                configuration.setProperties(getMap(properties));
            }

            result.add(configuration);
        }
        return result;
    }
}