Example usage for com.amazonaws.services.elasticmapreduce.model JobFlowInstancesConfig JobFlowInstancesConfig

List of usage examples for com.amazonaws.services.elasticmapreduce.model JobFlowInstancesConfig JobFlowInstancesConfig

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model JobFlowInstancesConfig JobFlowInstancesConfig.

Prototype

JobFlowInstancesConfig

Source Link

Usage

From source file:awswc.AwsConsoleApp.java

License:Open Source License

static void runJobFlow() throws InterruptedException {
    // Configure instances to use
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    //********************************************************************//
    instances.setHadoopVersion(HADOOP_VERSION);
    instances.withEc2KeyName("ayuda-vp1");
    instances.setInstanceCount(MASTER_INSTANCE_COUNT);
    //instances.setInstanceGroups(instanceGroups)
    instances.setMasterInstanceType(InstanceType.M24xlarge.toString());
    instances.setSlaveInstanceType(InstanceType.M24xlarge.toString());
    //********************************************************************//
    HadoopJarStepConfig hadoopJarStep1 = new HadoopJarStepConfig().withJar(S3N_WORD_COUNT_JAR_) // This should be a full map reduce application.
            .withArgs(BUCKET_NAME + "inWC", BUCKET_NAME + "outWC");

    StepConfig stepConfig1 = new StepConfig().withName("wordcount").withHadoopJarStep(hadoopJarStep1)
            .withActionOnFailure("TERMINATE_JOB_FLOW");

    //********************************************************************//

    //********************************************************************//
    HadoopJarStepConfig hadoopJarStep2 = new HadoopJarStepConfig().withJar(S3N_MAX_WORD_COUNT_JAR) // This should be a full map reduce application.
            .withArgs(BUCKET_NAME + "outWC", BUCKET_NAME + "outXWC", "hate", "10");

    StepConfig stepConfig2 = new StepConfig().withName("maxwordcount").withHadoopJarStep(hadoopJarStep2)
            .withActionOnFailure("TERMINATE_JOB_FLOW");
    //********************************************************************//

    Collection<StepConfig> csc = new ArrayList<StepConfig>();
    csc.add(stepConfig1);//  w w w . j  a  v a 2 s. c o m
    csc.add(stepConfig2);

    // BootstrapActions bootstrapActions = new BootstrapActions();
    RunJobFlowRequest runFlowRequest = new RunJobFlowRequest().withName(FLOW_NAME).withInstances(instances)
            .withSteps(csc).withLogUri(BUCKET_NAME + "debug")
    /*.withBootstrapActions(
      bootstrapActions.newRunIf(
          "instance.isMaster=true",
          bootstrapActions.newConfigureDaemons()
              .withHeapSize(Daemon.JobTracker, 4096)
              .build()),
              bootstrapActions.newRunIf(
                      "instance.isRunningNameNode=true",
                      bootstrapActions.newConfigureDaemons()
                      .withHeapSize(Daemon.NameNode, 4096).build()),
      bootstrapActions.newRunIf(
                                     "instance.isRunningDataNode=true",
                                     bootstrapActions.newConfigureDaemons()
                                     .withHeapSize(Daemon.DataNode, 4096).build()),
      bootstrapActions.newRunIf(
                                     "instance.isRunningJobTracker=true",
                                     bootstrapActions.newConfigureDaemons()
                                     .withHeapSize(Daemon.JobTracker, 4096).build()),
      bootstrapActions.newRunIf(
                                             "instance.isRunningTaskTracker=true",
                                             bootstrapActions.newConfigureDaemons()
                                             .withHeapSize(Daemon.TaskTracker, 4096).build())                                             
                                             
                                     /*,
                                             
      bootstrapActions.newRunIf(
                                             "instance.isSlave=true",
      bootstrapActions.newConfigureHadoop()
                                       .withKeyValue(ConfigFile.Site,"mapred.tasktracker.map.tasks.maximum", "4"))                                            
              )*/;

    RunJobFlowResult runJobFlowResult = emr.runJobFlow(runFlowRequest);

    String jobFlowId = runJobFlowResult.getJobFlowId();
    System.out.println("Ran job flow with id: " + jobFlowId);

    //wasFinished(runJobFlowResult);

}

From source file:com.aegeus.aws.ElasticMapReduceService.java

License:Apache License

/**
 * Create a new EMR Cluster over Hadoop 2.4.0
 *//*from w  ww  . j  a  va2 s  .c  o  m*/
public void createCluster() {
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig()
            .withInstanceCount((int) config.getInstanceCount()).withMasterInstanceType(config.getMasterType())
            .withSlaveInstanceType(config.getSlaveType());

    if (Strings.isNullOrEmpty(config.getKeyName())) {
        instances.setEc2KeyName(config.getKeyName());
    }

    if (!Strings.isNullOrEmpty(config.getSubnetId())) {
        instances.setEc2SubnetId(config.getSubnetId());
    } else {
        instances.setPlacement(new PlacementType(config.getPlace()));
    }

    ScriptBootstrapActionConfig installEsConfig = new ScriptBootstrapActionConfig()
            .withPath("s3://support.elasticmapreduce/bootstrap-actions/other/elasticsearch_install.rb");

    BootstrapActionConfig installEs = new BootstrapActionConfig("Elasticsearch Install", installEsConfig);

    RunJobFlowRequest request = new RunJobFlowRequest().withName(config.getName()).withReleaseLabel("emr-4.1.0")
            .withServiceRole("Default_AWS_Role").withJobFlowRole("Default_AWS_Role")
            .withBootstrapActions(installEs).withInstances(instances);

    if (!Strings.isNullOrEmpty(config.getLogBucket())) {
        request.setLogUri(config.getLogBucket());
    }

    RunJobFlowResult result = emr.runJobFlow(request);

    clusterId = result.getJobFlowId();
}

From source file:com.clouddrive.parth.NewClass.java

private static JobFlowInstancesConfig configInstance() throws Exception {

    // Configure instances to use
    JobFlowInstancesConfig instance = new JobFlowInstancesConfig();
    instance.setHadoopVersion(HADOOP_VERSION);
    instance.setInstanceCount(INSTANCE_COUNT);
    instance.setMasterInstanceType(INSTANCE_TYPE);
    instance.setSlaveInstanceType(INSTANCE_TYPE);
    //instance.setKeepJobFlowAliveWhenNoSteps(true);
    //instance.setEc2KeyName("parth");

    return instance;
}

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

public synchronized void startup() throws InterruptedException {
    checkConnection(false);/* www  . jav a 2 s.  co  m*/
    _clusterState = ClusterState.STARTING;
    boolean successful = false;
    try {
        EmrSettings settings = getSettings();
        if (settings.getPrivateKeyName() == null) {
            throw new NullPointerException(
                    "privateKeyName must not be null please configure settings properly");
        }
        LOG.info("Starting job flow '" + getName() + "' ...");
        if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) {
            throw new IllegalStateException("Job flow with name '" + getName() + "' already running.");
        }
        boolean keepAlive = true;
        JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig();
        jobConfig.setHadoopVersion(_settings.getHadoopVersion());
        jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId());
        jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId());
        jobConfig.setInstanceCount(settings.getInstanceCount());
        jobConfig.setEc2KeyName(settings.getPrivateKeyName());
        jobConfig.setPlacement(new PlacementType());
        jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive);

        final RunJobFlowRequest startRequest = new RunJobFlowRequest();

        startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath());
        startRequest.setInstances(jobConfig);
        startRequest.setName(getName());
        startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo());
        startRequest.setBootstrapActions(_settings.getBootstrapActions());
        if (settings.isDebugEnabled()) {
            startRequest.withSteps(DEBUG_STEP);
        }
        RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest);
        _jobFlowId = startResponse.getJobFlowId();
        waitUntilClusterStarted(_jobFlowId);
        LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is "
                + _masterHost);
        successful = true;
    } finally {
        if (successful) {
            _clusterState = ClusterState.CONNECTED;
        } else {
            _clusterState = ClusterState.UNCONNECTED;
            _jobFlowId = null;
        }
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.util.cloud.AWSElasticMapReduceJob.java

License:LGPL

void init() {

    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.jarLocation);
    requireNonNull(this.jarArguments);
    requireNonNull(this.slavesInstanceType);
    requireNonNull(this.hadoopVersion);
    requireNonNull(this.jobFlowName);

    if (this.nInstances < 1) {
        throw new IllegalArgumentException("the number of instance is lower than 1");
    }// w ww  .j a  v  a2 s.co  m

    if (this.masterInstanceType == null) {
        this.masterInstanceType = this.slavesInstanceType;
    }

    // Set the hadoop jar step
    final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig().withJar(this.jarLocation.trim())
            .withArgs(this.jarArguments);

    // Set step config
    final StepConfig stepConfig = new StepConfig().withName(this.jobFlowName + "-step")
            .withHadoopJarStep(hadoopJarStep).withActionOnFailure("TERMINATE_JOB_FLOW");

    // Set the instance
    final JobFlowInstancesConfig instances = new JobFlowInstancesConfig().withInstanceCount(this.nInstances)
            .withMasterInstanceType(this.masterInstanceType).withSlaveInstanceType(this.slavesInstanceType)
            .withHadoopVersion(this.hadoopVersion);

    // Configure hadoop
    final ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig()
            .withPath("s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop")
            .withArgs("--site-key-value",
                    "mapreduce.tasktracker.map.tasks.maximum=" + this.taskTrackerMaxMapTasks);

    final BootstrapActionConfig bootstrapActions = new BootstrapActionConfig().withName("Configure hadoop")
            .withScriptBootstrapAction(scriptBootstrapAction);

    // Enable debugging
    StepFactory stepFactory = new StepFactory();
    StepConfig enableDebugging = new StepConfig().withName("Enable Debugging")
            .withActionOnFailure("TERMINATE_JOB_FLOW").withHadoopJarStep(stepFactory.newEnableDebuggingStep());

    // Run flow
    this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName);

    // Enable or not debugging
    if (this.enableDebugging) {
        this.runFlowRequest.withInstances(instances).withSteps(enableDebugging, stepConfig);
    } else {
        this.runFlowRequest.withInstances(instances).withSteps(stepConfig);
    }

    // Limit the number of task in a task tracker
    if (this.taskTrackerMaxMapTasks > 0) {
        this.runFlowRequest.withBootstrapActions(bootstrapActions);
    }

    if (this.logPathname != null && !"".equals(this.logPathname)) {
        this.runFlowRequest.withLogUri(this.logPathname);
    }

    // Set EC2 Key name
    if (this.ec2KeyName != null) {
        this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName);
    }
}

From source file:org.deeplearning4j.legacyExamples.EmrSparkExample.java

License:Apache License

public void entryPoint(String[] args) {
    JCommander jcmdr = new JCommander(this);
    try {//  w ww .j  a  va  2 s.  com
        jcmdr.parse(args);
    } catch (ParameterException e) {
        jcmdr.usage();
        try {
            Thread.sleep(500);
        } catch (Exception e2) {
        }
        throw e;
    }

    AmazonElasticMapReduceClientBuilder builder = AmazonElasticMapReduceClientBuilder.standard();
    builder.withRegion(region);
    builder.withCredentials(getCredentialsProvider());

    AmazonElasticMapReduce emr = builder.build();

    List<StepConfig> steps = new ArrayList<>();

    if (upload) {
        log.info("uploading uber jar");

        AmazonS3ClientBuilder s3builder = AmazonS3ClientBuilder.standard();
        s3builder.withRegion(region);
        s3builder.withCredentials(getCredentialsProvider());
        AmazonS3 s3Client = s3builder.build();

        if (!s3Client.doesBucketExist(bucketName)) {
            s3Client.createBucket(bucketName);
        }

        File uberJarFile = new File(uberJar);

        s3Client.putObject(new PutObjectRequest(bucketName, uberJarFile.getName(), uberJarFile));
    }

    if (debug) {
        log.info("enable debug");

        StepFactory stepFactory = new StepFactory(builder.getRegion() + ".elasticmapreduce");
        StepConfig enableDebugging = new StepConfig().withName("Enable Debugging")
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newEnableDebuggingStep());
        steps.add(enableDebugging);
    }

    if (execute) {
        log.info("execute spark step");

        HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig();
        sparkStepConf.withJar("command-runner.jar");
        sparkStepConf.withArgs("spark-submit", "--deploy-mode", "cluster", "--class", className,
                getS3UberJarUrl(), "-useSparkLocal", "false");

        ActionOnFailure action = ActionOnFailure.TERMINATE_JOB_FLOW;

        if (keepAlive) {
            action = ActionOnFailure.CONTINUE;
        }

        StepConfig sparkStep = new StepConfig().withName("Spark Step").withActionOnFailure(action)
                .withHadoopJarStep(sparkStepConf);
        steps.add(sparkStep);
    }

    log.info("create spark cluster");

    Application sparkApp = new Application().withName("Spark");

    // service and job flow role will be created automatically when
    // launching cluster in aws console, better do that first or create
    // manually

    RunJobFlowRequest request = new RunJobFlowRequest().withName("Spark Cluster").withSteps(steps)
            .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole")
            .withApplications(sparkApp).withReleaseLabel(emrVersion).withLogUri(getS3BucketLogsUrl())
            .withInstances(new JobFlowInstancesConfig().withEc2KeyName("spark").withInstanceCount(instanceCount)
                    .withKeepJobFlowAliveWhenNoSteps(keepAlive).withMasterInstanceType(instanceType)
                    .withSlaveInstanceType(instanceType));

    RunJobFlowResult result = emr.runJobFlow(request);

    log.info(result.toString());

    log.info("done");
}

From source file:org.finra.dm.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Create the job flow instance configuration which contains all the job flow configuration details.
 *
 * @param emrClusterDefinition the EMR cluster definition that contains all the EMR parameters.
 *
 * @return the job flow instance configuration.
 *///from  ww w. j  av a 2s. c om
private JobFlowInstancesConfig getJobFlowInstancesConfig(EmrClusterDefinition emrClusterDefinition) {
    // Create a new job flow instance config object
    JobFlowInstancesConfig jobFlowInstancesConfig = new JobFlowInstancesConfig();

    // Add the DM EMR support security group as additional group to master node.
    String additionalSecurityGroup = configurationHelper
            .getProperty(ConfigurationValue.EMR_DM_SUPPORT_SECURITY_GROUP);

    if (StringUtils.isNotBlank(additionalSecurityGroup)) {
        List<String> additionalSecurityGroups = new ArrayList<>();
        additionalSecurityGroups.add(additionalSecurityGroup);
        jobFlowInstancesConfig.setAdditionalMasterSecurityGroups(additionalSecurityGroups);
    }

    // Fill-in the ssh key
    if (StringUtils.isNotBlank(emrClusterDefinition.getSshKeyPairName())) {
        jobFlowInstancesConfig.setEc2KeyName(emrClusterDefinition.getSshKeyPairName());
    }

    // Fill-in subnet id
    if (StringUtils.isNotBlank(emrClusterDefinition.getSubnetId())) {
        jobFlowInstancesConfig.setEc2SubnetId(emrClusterDefinition.getSubnetId());
    }

    // Fill in instance groups
    jobFlowInstancesConfig.setInstanceGroups(getInstanceGroupConfig(emrClusterDefinition));

    // Check for optional parameters and then fill-in
    // Keep Alive Cluster flag
    if (emrClusterDefinition.isKeepAlive() != null) {
        jobFlowInstancesConfig.setKeepJobFlowAliveWhenNoSteps(emrClusterDefinition.isKeepAlive());
    }

    // Termination protection flag
    if (emrClusterDefinition.isTerminationProtection() != null) {
        jobFlowInstancesConfig.setTerminationProtected(emrClusterDefinition.isTerminationProtection());
    }

    // Setting the hadoop version
    if (StringUtils.isNotBlank(emrClusterDefinition.getHadoopVersion())) {
        jobFlowInstancesConfig.setHadoopVersion(emrClusterDefinition.getHadoopVersion());
    }

    // Return the object
    return jobFlowInstancesConfig;
}

From source file:org.finra.herd.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Creates the job flow instance configuration containing specification of the number and type of Amazon EC2 instances.
 *
 * @param emrClusterDefinition the EMR cluster definition that contains all the EMR parameters
 *
 * @return the job flow instance configuration
 *///w w  w .j  ava  2 s  . co m
private JobFlowInstancesConfig getJobFlowInstancesConfig(EmrClusterDefinition emrClusterDefinition) {
    // Create a new job flow instances configuration object.
    JobFlowInstancesConfig jobFlowInstancesConfig = new JobFlowInstancesConfig();
    // Set up master/slave security group
    jobFlowInstancesConfig.setEmrManagedMasterSecurityGroup(emrClusterDefinition.getMasterSecurityGroup());
    jobFlowInstancesConfig.setEmrManagedSlaveSecurityGroup(emrClusterDefinition.getSlaveSecurityGroup());

    // Add additional security groups to master nodes.
    jobFlowInstancesConfig
            .setAdditionalMasterSecurityGroups(emrClusterDefinition.getAdditionalMasterSecurityGroups());

    // Add additional security groups to slave nodes.
    jobFlowInstancesConfig
            .setAdditionalSlaveSecurityGroups(emrClusterDefinition.getAdditionalSlaveSecurityGroups());

    // Fill-in the ssh key.
    if (StringUtils.isNotBlank(emrClusterDefinition.getSshKeyPairName())) {
        jobFlowInstancesConfig.setEc2KeyName(emrClusterDefinition.getSshKeyPairName());
    }

    // Fill in configuration for the instance groups in a cluster.
    jobFlowInstancesConfig
            .setInstanceGroups(getInstanceGroupConfigs(emrClusterDefinition.getInstanceDefinitions()));

    // Fill in instance fleet configuration.
    jobFlowInstancesConfig.setInstanceFleets(getInstanceFleets(emrClusterDefinition.getInstanceFleets()));

    // Fill-in subnet id.
    if (StringUtils.isNotBlank(emrClusterDefinition.getSubnetId())) {
        // Use collection of subnet IDs when instance fleet configuration is specified. Otherwise, we expect a single EC2 subnet ID to be passed here.
        if (CollectionUtils.isNotEmpty(jobFlowInstancesConfig.getInstanceFleets())) {
            jobFlowInstancesConfig
                    .setEc2SubnetIds(herdStringHelper.splitAndTrim(emrClusterDefinition.getSubnetId(), ","));
        } else {
            jobFlowInstancesConfig.setEc2SubnetId(emrClusterDefinition.getSubnetId());
        }
    }

    // Fill in optional keep alive flag.
    if (emrClusterDefinition.isKeepAlive() != null) {
        jobFlowInstancesConfig.setKeepJobFlowAliveWhenNoSteps(emrClusterDefinition.isKeepAlive());
    }

    // Fill in optional termination protection flag.
    if (emrClusterDefinition.isTerminationProtection() != null) {
        jobFlowInstancesConfig.setTerminationProtected(emrClusterDefinition.isTerminationProtection());
    }

    // Fill in optional Hadoop version flag.
    if (StringUtils.isNotBlank(emrClusterDefinition.getHadoopVersion())) {
        jobFlowInstancesConfig.setHadoopVersion(emrClusterDefinition.getHadoopVersion());
    }

    // Return the object.
    return jobFlowInstancesConfig;
}

From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.ElasticMapReduceManager.java

License:Apache License

/**
 * @return JobFlowInstancesConfig//  w  w  w .  j  a  va2s . c o m
 */
private JobFlowInstancesConfig setupJobFlowInstancesConfig() {
    JobFlowInstancesConfig config = new JobFlowInstancesConfig().withKeepJobFlowAliveWhenNoSteps(true)
            .withInstanceCount(emrProperties.getInstanceCount())
            .withMasterInstanceType(emrProperties.getMasterInstanceType());

    if (!isEmpty(emrProperties.getKeyPairName())) {
        config.setEc2KeyName(emrProperties.getKeyPairName());
    }

    if (!isEmpty(emrProperties.getHadoopVersion())) {
        config.setHadoopVersion(emrProperties.getHadoopVersion());
    }

    if (!isEmpty(emrProperties.getAvailabilityZone())) {
        config.setPlacement(new PlacementType().withAvailabilityZone(emrProperties.getAvailabilityZone()));
    }

    if (!isEmpty(emrProperties.getSlaveInstanceType())) {
        config.setSlaveInstanceType(emrProperties.getSlaveInstanceType());
    } else {
        config.setSlaveInstanceType(emrProperties.getMasterInstanceType());
    }

    return config;
}

From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java

License:Apache License

private JobFlowInstancesConfig initEC2Instance(Integer numInsts, String masterInstanceType,
        String slaveInstanceType) {
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    instances.setInstanceCount(numInsts);
    instances.setMasterInstanceType(masterInstanceType);
    instances.setSlaveInstanceType(slaveInstanceType);
    instances.setKeepJobFlowAliveWhenNoSteps(alive);

    return instances;
}