Example usage for org.apache.hadoop.yarn.api.records ApplicationSubmissionContext setApplicationType

List of usage examples for org.apache.hadoop.yarn.api.records ApplicationSubmissionContext setApplicationType

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records ApplicationSubmissionContext setApplicationType.

Prototype

@Public
@Stable
public abstract void setApplicationType(String applicationType);

Source Link

Document

Set the application type

Usage

From source file:com.cloudera.llama.am.yarn.YarnRMConnector.java

License:Apache License

private ApplicationId _createApp(YarnClient rmClient, String queue) throws LlamaException {
    try {/*from   w w  w .  java2s.  co m*/
        // Create application
        YarnClientApplication newApp = rmClient.createApplication();
        ApplicationId appId = newApp.getNewApplicationResponse().getApplicationId();

        // Create launch context for app master
        ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);

        // set the application id
        appContext.setApplicationId(appId);

        // set the application name
        appContext.setApplicationName("Llama for " + queue);

        appContext.setApplicationType(appType);

        // Set the priority for the application master
        Priority pri = Records.newRecord(Priority.class);
        int priority = getConf().getInt(AM_PRIORITY_KEY, AM_PRIORITY_DEFAULT);
        pri.setPriority(priority);
        appContext.setPriority(pri);

        // Set the queue to which this application is to be submitted in the RM
        appContext.setQueue(queue);

        // Set up the container launch context for the application master
        ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
        appContext.setAMContainerSpec(amContainer);

        // unmanaged AM
        appContext.setUnmanagedAM(true);

        // setting max attempts to 1 to avoid warning from Yarn RM
        // as the AM is unmanaged, it doesn't really matter.
        appContext.setMaxAppAttempts(1);

        // Submit the application to the applications manager
        return rmClient.submitApplication(appContext);
    } catch (Exception ex) {
        throw new LlamaException(ex, ErrorCode.AM_CANNOT_CREATE, queue);
    }
}

From source file:com.datatorrent.stram.StramClient.java

License:Apache License

/**
 * Launch application for the dag represented by this client.
 *
 * @throws YarnException//from   w w  w . j  av  a2 s  .c o  m
 * @throws IOException
 */
public void startApplication() throws YarnException, IOException {
    Class<?>[] defaultClasses;

    if (applicationType.equals(YARN_APPLICATION_TYPE)) {
        //TODO restrict the security check to only check if security is enabled for webservices.
        if (UserGroupInformation.isSecurityEnabled()) {
            defaultClasses = DATATORRENT_SECURITY_CLASSES;
        } else {
            defaultClasses = DATATORRENT_CLASSES;
        }
    } else {
        throw new IllegalStateException(applicationType + " is not a valid application type.");
    }

    LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses);

    if (resources != null) {
        localJarFiles.addAll(resources);
    }

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    //GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class);
    //GetClusterNodesResponse clusterNodesResp = rmClient.clientRM.getClusterNodes(clusterNodesReq);
    //LOG.info("Got Cluster node info from ASM");
    //for (NodeReport node : clusterNodesResp.getNodeReports()) {
    //  LOG.info("Got node report from ASM for"
    //           + ", nodeId=" + node.getNodeId()
    //           + ", nodeAddress" + node.getHttpAddress()
    //           + ", nodeRackName" + node.getRackName()
    //           + ", nodeNumContainers" + node.getNumContainers()
    //           + ", nodeHealthStatus" + node.getHealthReport());
    //}
    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication newApp = yarnClient.createApplication();
    appId = newApp.getNewApplicationResponse().getApplicationId();

    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
    int amMemory = dag.getMasterMemoryMB();
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) {
        dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString());
    }

    // Create launch context for app master
    LOG.info("Setting up application submission context for ASM");
    ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);

    // set the application id
    appContext.setApplicationId(appId);
    // set the application name
    appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME));
    appContext.setApplicationType(this.applicationType);
    if (YARN_APPLICATION_TYPE.equals(this.applicationType)) {
        //appContext.setMaxAppAttempts(1); // no retries until Stram is HA
    }

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // Setup security tokens
    // If security is enabled get ResourceManager and NameNode delegation tokens.
    // Set these tokens on the container so that they are sent as part of application submission.
    // This also sets them up for renewal by ResourceManager. The NameNode delegation rmToken
    // is also used by ResourceManager to fetch the jars from HDFS and set them up for the
    // application master launch.
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        FileSystem fs = StramClientUtils.newFileSystemInstance(conf);
        try {
            final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
            if (tokens != null) {
                for (Token<?> token : tokens) {
                    LOG.info("Got dt for " + fs.getUri() + "; " + token);
                }
            }
        } finally {
            fs.close();
        }

        addRMDelegationToken(tokenRenewer, credentials);

        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    // copy required jar files to dfs, to be localized for containers
    FileSystem fs = StramClientUtils.newFileSystemInstance(conf);
    try {
        Path appsBasePath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS);
        Path appPath = new Path(appsBasePath, appId.toString());

        String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {}));

        LOG.info("libjars: {}", libJarsCsv);
        dag.getAttributes().put(LogicalPlan.LIBRARY_JARS, libJarsCsv);
        LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, libJarsCsv, localResources,
                fs);

        if (archives != null) {
            String[] localFiles = archives.split(",");
            String archivesCsv = copyFromLocal(fs, appPath, localFiles);
            LOG.info("archives: {}", archivesCsv);
            dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv);
            LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.ARCHIVE, archivesCsv,
                    localResources, fs);
        }

        if (files != null) {
            String[] localFiles = files.split(",");
            String filesCsv = copyFromLocal(fs, appPath, localFiles);
            LOG.info("files: {}", filesCsv);
            dag.getAttributes().put(LogicalPlan.FILES, filesCsv);
            LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, filesCsv, localResources,
                    fs);
        }

        dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString());
        if (dag.getAttributes()
                .get(OperatorContext.STORAGE_AGENT) == null) { /* which would be the most likely case */
            Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS);
            // use conf client side to pickup any proxy settings from dt-site.xml
            dag.setAttribute(OperatorContext.STORAGE_AGENT,
                    new FSStorageAgent(checkpointPath.toString(), conf));
        }
        if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) {
            dag.setAttribute(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator());
        }

        // Set the log4j properties if needed
        if (!log4jPropFile.isEmpty()) {
            Path log4jSrc = new Path(log4jPropFile);
            Path log4jDst = new Path(appPath, "log4j.props");
            fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
            FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
            LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
            log4jRsrc.setType(LocalResourceType.FILE);
            log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
            log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
            log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
            log4jRsrc.setSize(log4jFileStatus.getLen());
            localResources.put("log4j.properties", log4jRsrc);
        }

        if (originalAppId != null) {
            Path origAppPath = new Path(appsBasePath, this.originalAppId);
            LOG.info("Restart from {}", origAppPath);
            copyInitialState(origAppPath);
        }

        // push logical plan to DFS location
        Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME);
        FSDataOutputStream outStream = fs.create(cfgDst, true);
        LogicalPlan.write(this.dag, outStream);
        outStream.close();

        Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME);
        outStream = fs.create(launchConfigDst, true);
        conf.writeXml(outStream);
        outStream.close();

        FileStatus topologyFileStatus = fs.getFileStatus(cfgDst);
        LocalResource topologyRsrc = Records.newRecord(LocalResource.class);
        topologyRsrc.setType(LocalResourceType.FILE);
        topologyRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
        topologyRsrc.setResource(ConverterUtils.getYarnUrlFromURI(cfgDst.toUri()));
        topologyRsrc.setTimestamp(topologyFileStatus.getModificationTime());
        topologyRsrc.setSize(topologyFileStatus.getLen());
        localResources.put(LogicalPlan.SER_FILE_NAME, topologyRsrc);

        // Set local resource info into app master container launch context
        amContainer.setLocalResources(localResources);

        // Set the necessary security tokens as needed
        //amContainer.setContainerTokens(containerToken);
        // Set the env variables to be setup in the env where the application master will be run
        LOG.info("Set the environment for the application master");
        Map<String, String> env = new HashMap<String, String>();

        // Add application jar(s) location to classpath
        // At some point we should not be required to add
        // the hadoop specific classpaths to the env.
        // It should be provided out of the box.
        // For now setting all required classpaths including
        // the classpath to "." for the application jar(s)
        // including ${CLASSPATH} will duplicate the class path in app master, removing it for now
        //StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*");
        StringBuilder classPathEnv = new StringBuilder("./*");
        String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH);
        for (String c : StringUtils.isBlank(classpath) ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH
                : classpath.split(",")) {
            if (c.equals("$HADOOP_CLIENT_CONF_DIR")) {
                // SPOI-2501
                continue;
            }
            classPathEnv.append(':');
            classPathEnv.append(c.trim());
        }
        env.put("CLASSPATH", classPathEnv.toString());
        // propagate to replace node managers user name (effective in non-secure mode)
        env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName());

        amContainer.setEnvironment(env);

        // Set the necessary command to execute the application master
        ArrayList<CharSequence> vargs = new ArrayList<CharSequence>(30);

        // Set java executable command
        LOG.info("Setting up app master command");
        vargs.add(javaCmd);
        if (dag.isDebug()) {
            vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n");
        }
        // Set Xmx based on am memory size
        // default heap size 75% of total memory
        if (dag.getMasterJVMOptions() != null) {
            vargs.add(dag.getMasterJVMOptions());
        }
        vargs.add("-Xmx" + (amMemory * 3 / 4) + "m");
        vargs.add("-XX:+HeapDumpOnOutOfMemoryError");
        vargs.add("-XX:HeapDumpPath=/tmp/dt-heap-" + appId.getId() + ".bin");
        vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA");
        vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
        vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath()));
        if (dag.isDebug()) {
            vargs.add("-Dlog4j.debug=true");
        }

        String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL);
        if (loggersLevel != null) {
            vargs.add(String.format("-D%s=%s", DTLoggerFactory.DT_LOGGERS_LEVEL, loggersLevel));
        }
        vargs.add(StreamingAppMaster.class.getName());
        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
        vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

        // Get final command
        StringBuilder command = new StringBuilder(9 * vargs.size());
        for (CharSequence str : vargs) {
            command.append(str).append(" ");
        }

        LOG.info("Completed setting up app master command " + command.toString());
        List<String> commands = new ArrayList<String>();
        commands.add(command.toString());
        amContainer.setCommands(commands);

        // Set up resource type requirements
        // For now, only memory is supported so we set memory requirements
        Resource capability = Records.newRecord(Resource.class);
        capability.setMemory(amMemory);
        appContext.setResource(capability);

        // Service data is a binary blob that can be passed to the application
        // Not needed in this scenario
        // amContainer.setServiceData(serviceData);
        appContext.setAMContainerSpec(amContainer);

        // Set the priority for the application master
        Priority pri = Records.newRecord(Priority.class);
        pri.setPriority(amPriority);
        appContext.setPriority(pri);
        // Set the queue to which this application is to be submitted in the RM
        appContext.setQueue(queueName);

        // Submit the application to the applications manager
        // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest);
        // Ignore the response as either a valid response object is returned on success
        // or an exception thrown to denote some form of a failure
        String specStr = Objects.toStringHelper("Submitting application: ")
                .add("name", appContext.getApplicationName()).add("queue", appContext.getQueue())
                .add("user", UserGroupInformation.getLoginUser()).add("resource", appContext.getResource())
                .toString();
        LOG.info(specStr);
        if (dag.isDebug()) {
            //LOG.info("Full submission context: " + appContext);
        }
        yarnClient.submitApplication(appContext);
    } finally {
        fs.close();
    }
}

From source file:de.huberlin.wbi.hiway.common.Client.java

License:Apache License

/**
 * Main run function for the client./*  w w w.  j  a v a  2  s . co m*/
 * 
 * @return true if application completed successfully.
 */
private boolean run() throws IOException, YarnException {

    /* log */ System.out.println("Running Client");

    yarnClient.start();
    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();

    /* log */ System.out.println(
            "Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    /* log */ System.out.println("Got Cluster node info from ASM");
    /* log */ for (NodeReport node : clusterNodeReports)
        System.out.println("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    /* log */ System.out.println("Queue info" + ", queueName=" + queueInfo.getQueueName()
            + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity="
            + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size()
            + ", queueChildQueueCount=" + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    /* log */ for (QueueUserACLInfo aclInfo : listAclInfo)
        for (QueueACL userAcl : aclInfo.getUserAcls())
            System.out.println("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName()
                    + ", userAcl=" + userAcl.name());

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();

    // Get min/max resource capabilities from RM and change memory ask if needed
    int maxVC = appResponse.getMaximumResourceCapability().getVirtualCores();
    /* log */ System.out.println("Max vCores capabililty of resources in this cluster " + maxVC);
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    /* log */ System.out.println("Max mem capabililty of resources in this cluster " + maxMem);
    // A resource ask cannot exceed the max.
    if (amVCores > maxVC) {
        /* log */ System.out.println("AM vCores specified above max threshold of cluster. Using max value."
                + ", specified=" + amVCores + ", max=" + maxVC);
        amVCores = maxVC;
    }
    if (amMemory > maxMem) {
        /* log */ System.out.println("AM memory specified above max threshold of cluster. Using max value."
                + ", specified=" + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationType(conf.get(HiWayConfiguration.HIWAY_AM_APPLICATION_TYPE,
            HiWayConfiguration.HIWAY_AM_APPLICATION_TYPE_DEFAULT));
    appContext.setApplicationName("run " + workflowParam + " (type: " + workflowType.toString() + ")");
    ApplicationId appId = appContext.getApplicationId();
    String hdfsBaseDirectoryName = conf.get(HiWayConfiguration.HIWAY_AM_DIRECTORY_BASE,
            HiWayConfiguration.HIWAY_AM_DIRECTORY_BASE_DEFAULT);
    String hdfsSandboxDirectoryName = conf.get(HiWayConfiguration.HIWAY_AM_DIRECTORY_CACHE,
            HiWayConfiguration.HIWAY_AM_DIRECTORY_CACHE_DEFAULT);
    Path hdfsBaseDirectory = new Path(new Path(hdfs.getUri()), hdfsBaseDirectoryName);
    Data.setHdfsBaseDirectory(hdfsBaseDirectory);
    Path hdfsSandboxDirectory = new Path(hdfsBaseDirectory, hdfsSandboxDirectoryName);
    Path hdfsApplicationDirectory = new Path(hdfsSandboxDirectory, appId.toString());
    Data.setHdfsApplicationDirectory(hdfsApplicationDirectory);
    Data.setHdfs(hdfs);

    Path wfSource, wfDest, wfTemp = null;
    try {
        wfSource = new Path(new URI(workflowParam).getPath());
    } catch (URISyntaxException e) {
        wfSource = new Path(workflowParam);
    }
    wfDest = new Path(hdfsApplicationDirectory + "/" + wfSource.getName());

    // (1) if workflow file in hdfs, then transfer to temp file in local fs
    if (hdfs.exists(wfSource)) {
        wfTemp = new Path("./." + wfSource.getName());
        System.out.println("Workflow found in HDFS at location " + wfSource);
        hdfs.copyToLocalFile(false, wfSource, wfTemp);
    }

    // (2) if galaxy workflow, then copy and replace input ports
    if (workflowType.equals(HiWayConfiguration.HIWAY_WORKFLOW_LANGUAGE_OPTS.galaxy)) {
        wfTemp = preProcessGalaxyWorkflow(wfSource, wfTemp);
    }

    if (wfTemp != null) {
        hdfs.copyFromLocalFile(wfTemp, wfDest);
        new File(wfTemp.toString()).delete();
    } else {
        hdfs.copyFromLocalFile(wfSource, wfDest);
    }

    if (summaryPath != null)
        summary = new Data(summaryPath);
    if (customMemPath != null)
        (new Data(customMemPath)).stageOut();

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    /* set the env variables to be setup in the env where the application master will be run */
    System.out.println("Set the environment for the application master");
    Map<String, String> env = new HashMap<>();

    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar)
            .append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
        classPathEnv.append(':');
        classPathEnv.append(File.pathSeparatorChar);
        classPathEnv.append(c.trim());
    }

    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    amContainer.setEnvironment(env);

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<>(30);

    // Set java executable command
    System.out.println("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
    if (HiWayConfiguration.debug)
        vargs.add(
                "-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=9010 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    vargs.add("-Xss" + "16m");
    // Set class name

    switch (workflowType) {
    case dax:
        vargs.add(HiWayConfiguration.HIWAY_WORKFLOW_LANGUAGE_DAX_AM_CLASS);
        break;
    case log:
        vargs.add(HiWayConfiguration.HIWAY_WORKFLOW_LANGUAGE_LOG_AM_CLASS);
        break;
    case galaxy:
        vargs.add(HiWayConfiguration.HIWAY_WORKFLOW_LANGUAGE_GALAXY_AM_CLASS);
        break;
    case cuneiformE:
        vargs.add(HiWayConfiguration.HIWAY_WORKFLOW_LANGUAGE_CUNEIFORME_AM_CLASS);
        break;
    default:
        vargs.add(HiWayConfiguration.HIWAY_WORKFLOW_LANGUAGE_CUNEIFORMJ_AM_CLASS);
    }

    vargs.add("--scheduler " + schedulerName.toString());
    if (memory != null)
        vargs.add("--memory " + memory);
    if (summary != null)
        vargs.add("--summary " + summary.getName());
    if (customMemPath != null)
        vargs.add("--custom " + customMemPath);
    vargs.add("--appid " + appId.toString());
    if (HiWayConfiguration.debug)
        vargs.add("--debug");
    if (HiWayConfiguration.verbose)
        vargs.add("--verbose");
    vargs.add(workflowParam);
    vargs.add("> >(tee AppMaster.stdout " + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout)");
    vargs.add("2> >(tee AppMaster.stderr " + ApplicationConstants.LOG_DIR_EXPANSION_VAR
            + "/AppMaster.stderr >&2)");

    // Get final command
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    System.out.println("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<>();
    commands.add(command.toString());
    amContainer.setCommands(commands);

    // Set up resource type requirements
    Resource capability = Records.newRecord(Resource.class);
    capability.setVirtualCores(amVCores);
    capability.setMemory(amMemory);
    appContext.setResource(capability);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = hdfs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                System.out.println("Got dt for " + hdfs.getUri() + "; " + token);
            }
        }
        try (DataOutputBuffer dob = new DataOutputBuffer()) {
            credentials.writeTokenStorageToStream(dob);
            ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
            amContainer.setTokens(fsTokens);
        }
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Records.newRecord(Priority.class);
    pri.setPriority(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    /* log */ System.out.println("Submitting application to ASM");
    yarnClient.submitApplication(appContext);

    // Monitor the application
    boolean success = monitorApplication(appId);

    if (success && summary != null) {
        summary.stageIn();
    }

    return success;

}

From source file:gobblin.yarn.GobblinYarnAppLauncher.java

License:Apache License

/**
 * Setup and submit the Gobblin Yarn application.
 *
 * @throws IOException if there's anything wrong setting up and submitting the Yarn application
 * @throws YarnException if there's anything wrong setting up and submitting the Yarn application
 *///from  ww  w. j  av a  2 s.  com
@VisibleForTesting
ApplicationId setupAndSubmitApplication() throws IOException, YarnException {
    YarnClientApplication gobblinYarnApp = this.yarnClient.createApplication();
    ApplicationSubmissionContext appSubmissionContext = gobblinYarnApp.getApplicationSubmissionContext();
    appSubmissionContext.setApplicationType(GOBBLIN_YARN_APPLICATION_TYPE);
    ApplicationId applicationId = appSubmissionContext.getApplicationId();

    GetNewApplicationResponse newApplicationResponse = gobblinYarnApp.getNewApplicationResponse();
    // Set up resource type requirements for ApplicationMaster
    Resource resource = prepareContainerResource(newApplicationResponse);

    // Add lib jars, and jars and files that the ApplicationMaster need as LocalResources
    Map<String, LocalResource> appMasterLocalResources = addAppMasterLocalResources(applicationId);

    ContainerLaunchContext amContainerLaunchContext = Records.newRecord(ContainerLaunchContext.class);
    amContainerLaunchContext.setLocalResources(appMasterLocalResources);
    amContainerLaunchContext.setEnvironment(YarnHelixUtils.getEnvironmentVariables(this.yarnConfiguration));
    amContainerLaunchContext
            .setCommands(Lists.newArrayList(buildApplicationMasterCommand(resource.getMemory())));
    if (UserGroupInformation.isSecurityEnabled()) {
        setupSecurityTokens(amContainerLaunchContext);
    }

    // Setup the application submission context
    appSubmissionContext.setApplicationName(this.applicationName);
    appSubmissionContext.setResource(resource);
    appSubmissionContext.setQueue(this.appQueueName);
    appSubmissionContext.setPriority(Priority.newInstance(0));
    appSubmissionContext.setAMContainerSpec(amContainerLaunchContext);

    // Also setup container local resources by copying local jars and files the container need to HDFS
    addContainerLocalResources(applicationId);

    // Submit the application
    LOGGER.info("Submitting application " + applicationId);
    this.yarnClient.submitApplication(appSubmissionContext);

    LOGGER.info("Application successfully submitted and accepted");
    ApplicationReport applicationReport = this.yarnClient.getApplicationReport(applicationId);
    LOGGER.info("Application Name: " + applicationReport.getName());
    LOGGER.info("Application Tracking URL: " + applicationReport.getTrackingUrl());
    LOGGER.info("Application User: " + applicationReport.getUser() + " Queue: " + applicationReport.getQueue());

    return applicationId;
}

From source file:io.amient.yarn1.YarnClient.java

License:Open Source License

/**
 * This method should be called by the implementing application static main
 * method. It does all the work around creating a yarn application and
 * submitting the request to the yarn resource manager. The class given in
 * the appClass argument will be run inside the yarn-allocated master
 * container.//from  w w w.  j  av  a  2  s .  c om
 */
public static void submitApplicationMaster(Properties appConfig, Class<? extends YarnMaster> masterClass,
        String[] args, Boolean awaitCompletion) throws Exception {
    log.info("Yarn1 App Configuration:");
    for (Object param : appConfig.keySet()) {
        log.info(param.toString() + " = " + appConfig.get(param).toString());
    }
    String yarnConfigPath = appConfig.getProperty("yarn1.site", "/etc/hadoop");
    String masterClassName = masterClass.getName();
    appConfig.setProperty("yarn1.master.class", masterClassName);
    String applicationName = appConfig.getProperty("yarn1.application.name", masterClassName);
    log.info("--------------------------------------------------------------");

    if (Boolean.valueOf(appConfig.getProperty("yarn1.local.mode", "false"))) {
        YarnMaster.run(appConfig, args);
        return;
    }

    int masterPriority = Integer.valueOf(
            appConfig.getProperty("yarn1.master.priority", String.valueOf(YarnMaster.DEFAULT_MASTER_PRIORITY)));
    int masterMemoryMb = Integer.valueOf(appConfig.getProperty("yarn1.master.memory.mb",
            String.valueOf(YarnMaster.DEFAULT_MASTER_MEMORY_MB)));
    int masterNumCores = Integer.valueOf(
            appConfig.getProperty("yarn1.master.num.cores", String.valueOf(YarnMaster.DEFAULT_MASTER_CORES)));
    String queue = appConfig.getProperty("yarn1.queue");

    Configuration yarnConfig = new YarnConfiguration();
    yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/core-site.xml"));
    yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/hdfs-site.xml"));
    yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/yarn-site.xml"));
    for (Map.Entry<Object, Object> entry : appConfig.entrySet()) {
        yarnConfig.set(entry.getKey().toString(), entry.getValue().toString());
    }

    final org.apache.hadoop.yarn.client.api.YarnClient yarnClient = org.apache.hadoop.yarn.client.api.YarnClient
            .createYarnClient();
    yarnClient.init(yarnConfig);
    yarnClient.start();

    for (NodeReport report : yarnClient.getNodeReports(NodeState.RUNNING)) {
        log.debug("Node report:" + report.getNodeId() + " @ " + report.getHttpAddress() + " | "
                + report.getCapability());
    }

    log.info("Submitting application master class " + masterClassName);

    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    final ApplicationId appId = appResponse.getApplicationId();
    if (appId == null) {
        System.exit(111);
    } else {
        appConfig.setProperty("am.timestamp", String.valueOf(appId.getClusterTimestamp()));
        appConfig.setProperty("am.id", String.valueOf(appId.getId()));
    }

    YarnClient.distributeResources(yarnConfig, appConfig, applicationName);

    String masterJvmArgs = appConfig.getProperty("yarn1.master.jvm.args", "");
    YarnContainerContext masterContainer = new YarnContainerContext(yarnConfig, appConfig, masterJvmArgs,
            masterPriority, masterMemoryMb, masterNumCores, applicationName, YarnMaster.class, args);

    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationName(masterClassName);
    appContext.setResource(masterContainer.capability);
    appContext.setPriority(masterContainer.priority);
    appContext.setQueue(queue);
    appContext.setApplicationType(appConfig.getProperty("yarn1.application.type", "YARN"));
    appContext.setAMContainerSpec(masterContainer.createContainerLaunchContext());

    log.info("Master container spec: " + masterContainer.capability);

    yarnClient.submitApplication(appContext);

    ApplicationReport report = yarnClient.getApplicationReport(appId);
    log.info("Tracking URL: " + report.getTrackingUrl());

    if (awaitCompletion) {
        Runtime.getRuntime().addShutdownHook(new Thread() {
            @Override
            public void run() {
                if (!yarnClient.isInState(Service.STATE.STOPPED)) {
                    log.info("Killing yarn application in shutdown hook");
                    try {
                        yarnClient.killApplication(appId);
                    } catch (Throwable e) {
                        log.error("Failed to kill yarn application - please check YARN Resource Manager", e);
                    }
                }
            }
        });

        float lastProgress = -0.0f;
        while (true) {
            try {
                Thread.sleep(10000);
                report = yarnClient.getApplicationReport(appId);
                if (lastProgress != report.getProgress()) {
                    lastProgress = report.getProgress();
                    log.info(report.getApplicationId() + " " + (report.getProgress() * 100.00) + "% "
                            + (System.currentTimeMillis() - report.getStartTime()) + "(ms) "
                            + report.getDiagnostics());
                }
                if (!report.getFinalApplicationStatus().equals(FinalApplicationStatus.UNDEFINED)) {
                    log.info(report.getApplicationId() + " " + report.getFinalApplicationStatus());
                    log.info("Tracking url: " + report.getTrackingUrl());
                    log.info("Finish time: " + ((System.currentTimeMillis() - report.getStartTime()) / 1000)
                            + "(s)");
                    break;
                }
            } catch (Throwable e) {
                log.error("Master Heart Beat Error - terminating", e);
                yarnClient.killApplication(appId);
                Thread.sleep(2000);
            }
        }
        yarnClient.stop();

        if (!report.getFinalApplicationStatus().equals(FinalApplicationStatus.SUCCEEDED)) {
            System.exit(112);
        }
    }
    yarnClient.stop();
}

From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java

License:Apache License

/**
 * This method will block until the ApplicationMaster/JobManager have been
 * deployed on YARN.//  w  ww  . j a v  a2s.  co m
 */
protected YarnClusterClient deployInternal() throws Exception {
    isReadyForDeployment();
    LOG.info("Using values:");
    LOG.info("\tTaskManager count = {}", taskManagerCount);
    LOG.info("\tJobManager memory = {}", jobManagerMemoryMb);
    LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb);

    final YarnClient yarnClient = getYarnClient();

    // ------------------ Check if the specified queue exists --------------------
    try {
        List<QueueInfo> queues = yarnClient.getAllQueues();
        // check only if there are queues configured in yarn and for this session.
        if (queues.size() > 0 && this.yarnQueue != null) {
            boolean queueFound = false;
            for (QueueInfo queue : queues) {
                if (queue.getQueueName().equals(this.yarnQueue)) {
                    queueFound = true;
                    break;
                }
            }
            if (!queueFound) {
                String queueNames = "";
                for (QueueInfo queue : queues) {
                    queueNames += queue.getQueueName() + ", ";
                }
                LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: "
                        + queueNames);
            }
        } else {
            LOG.debug("The YARN cluster does not have any queues configured");
        }
    } catch (Throwable e) {
        LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error details", e);
        }
    }

    // Create application via yarnClient
    final YarnClientApplication yarnApplication = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();

    Map<String, String> jobSystemProperties = new HashMap<>(2);

    // Certificates are materialized locally so DFSClient can be set to null
    // LocalResources are not used by Flink, so set it null
    HopsUtils.copyUserKafkaCerts(services.getUserCerts(), project, username,
            services.getSettings().getHopsworksTmpCertDir(), services.getSettings().getHdfsTmpCertDir(),
            JobType.FLINK, null, null, jobSystemProperties, services.getSettings().getFlinkKafkaCertDir(),
            appResponse.getApplicationId().toString());

    StringBuilder tmpBuilder = new StringBuilder();
    for (Map.Entry<String, String> prop : jobSystemProperties.entrySet()) {
        String option = YarnRunner.escapeForShell("-D" + prop.getKey() + "=" + prop.getValue());
        javaOptions.add(option);
        addHopsworksParam(option);
        tmpBuilder.append(prop.getKey()).append("=").append(prop.getValue()).append("@@");
    }

    dynamicPropertiesEncoded += tmpBuilder.toString();

    // ------------------ Add dynamic properties to local flinkConfiguraton ------
    Map<String, String> dynProperties = getDynamicProperties(dynamicPropertiesEncoded);
    for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) {
        flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue());
    }

    // ------------------ Set default file system scheme -------------------------
    try {
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }

    // initialize file system
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the 
    // getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the system is "
                + "using the default Hadoop configuration values. The Flink YARN "
                + "client needs to store its files in a distributed file system");
    }

    // ------ Check if the YARN ClusterClient has the requested resources ---
    // the yarnMinAllocationMB specifies the smallest possible container 
    // allocation size. all allocations below this value are automatically 
    // set to this value.
    final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
        LOG.warn("The JobManager or TaskManager memory is below the smallest possible "
                + "YARN Container size. The value of 'yarn.scheduler.minimum-allocation-mb'" + " is "
                + yarnMinAllocationMB + "'. Please increase the memory size."
                + "YARN will allocate the smaller containers but the scheduler will"
                + " account for the minimum-allocation-mb, maybe not all instances "
                + "you requested will start.");
    }

    // set the memory to minAllocationMB to do the next checks correctly
    if (jobManagerMemoryMb < yarnMinAllocationMB) {
        jobManagerMemoryMb = yarnMinAllocationMB;
    }
    if (taskManagerMemoryMb < yarnMinAllocationMB) {
        taskManagerMemoryMb = yarnMinAllocationMB;
    }

    Resource maxRes = appResponse.getMaximumResourceCapability();
    final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the "
            + "'yarn.nodemanager.resource.memory-mb' configuration values\n";
    if (jobManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException("The cluster does not have the requested resources for the JobManager"
                + " available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: "
                + jobManagerMemoryMb + "MB. " + NOTE);
    }

    if (taskManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the TaskManagers available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb
                        + "MB. " + NOTE);
    }

    final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, "
            + "but maybe not all TaskManagers are connecting from the beginning "
            + "because the resources are currently not available in the cluster. "
            + "The allocation might take more time than usual because the Flink "
            + "YARN client needs to wait until the resources become available.";
    int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC);

    }
    if (taskManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb
                + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit
                + NOTE_RSC);
    }
    if (jobManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn(
                "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than "
                        + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }

    // ----------------- check if the requested containers fit into the cluster.
    int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length);
    // first, allocate the jobManager somewhere.
    if (!allocateResource(nmFree, jobManagerMemoryMb)) {
        LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. "
                + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: "
                + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC);
    }
    // allocate TaskManagers
    for (int i = 0; i < taskManagerCount; i++) {
        if (!allocateResource(nmFree, taskManagerMemoryMb)) {
            LOG.warn("There is not enough memory available in the YARN cluster. "
                    + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. "
                    + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n"
                    + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/"
                    + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: "
                    + Arrays.toString(nmFree) + NOTE_RSC);
        }
    }

    Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size());
    for (File file : shipFiles) {
        effectiveShipFiles.add(file.getAbsoluteFile());
    }

    //check if there is a logback or log4j file
    File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME);
    final boolean hasLogback = logbackFile.exists();
    if (hasLogback) {
        effectiveShipFiles.add(logbackFile);
    }

    File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME);
    final boolean hasLog4j = log4jFile.exists();
    if (hasLog4j) {
        effectiveShipFiles.add(log4jFile);
        if (hasLogback) {
            // this means there is already a logback configuration file --> fail
            LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and "
                    + "Logback configuration files. Please delete or rename one of them.");
        }
    }

    addLibFolderToShipFiles(effectiveShipFiles);

    final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j);

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();

    final ApplicationId appId = appContext.getApplicationId();

    // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------
    String zkNamespace = getZookeeperNamespace();
    // no user specified cli argument for namespace?
    if (zkNamespace == null || zkNamespace.isEmpty()) {
        // namespace defined in config? else use applicationId as default.
        zkNamespace = flinkConfiguration.getString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY,
                String.valueOf(appId));
        setZookeeperNamespace(zkNamespace);
    }

    flinkConfiguration.setString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY, zkNamespace);

    if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));

        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext
                .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }

    // local resource map for Yarn
    final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size());
    // list of remote paths (after upload)
    final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size());
    // classpath assembler
    final StringBuilder classPathBuilder = new StringBuilder();
    // ship list that enables reuse of resources for task manager containers
    StringBuilder envShipFileList = new StringBuilder();

    // upload and register ship files
    for (File shipFile : effectiveShipFiles) {
        LocalResource shipResources = Records.newRecord(LocalResource.class);

        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());

        paths.add(remotePath);

        localResources.put(shipFile.getName(), shipResources);

        classPathBuilder.append(shipFile.getName());
        if (shipFile.isDirectory()) {
            // add directories to the classpath
            classPathBuilder.append(File.separator).append("*");
        }
        classPathBuilder.append(File.pathSeparator);

        envShipFileList.append(remotePath).append(",");
    }
    ////////////////////////////////////////////////////////////////////////////
    /*
     * Add Hops LocalResources paths here
     *
     */
    //Add it to localResources
    for (Entry<String, LocalResource> entry : hopsworksResources.entrySet()) {
        localResources.put(entry.getKey(), entry.getValue());
        //Append name to classPathBuilder
        classPathBuilder.append(entry.getKey());
        classPathBuilder.append(File.pathSeparator);
    }

    ////////////////////////////////////////////////////////////////////////////
    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf,
            fs.getHomeDirectory());
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    paths.add(remotePathJar);
    classPathBuilder.append("flink.jar").append(File.pathSeparator);
    paths.add(remotePathConf);
    classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator);

    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    // setup security tokens
    Utils.setTokensFor(amContainer, paths, conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH and environment variables for ApplicationMaster
    final Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX,
            flinkConfiguration));
    // set Flink app class path
    appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());

    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME,
            UserGroupInformation.getCurrentUser().getShortUserName());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));
    appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace());

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    // set classpath from YARN configuration
    Utils.setupYarnClassPath(conf, appMasterEnv);

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name); // application name
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    // add a hook to clean up in case deployment fails
    Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication);
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    final long startTime = System.currentTimeMillis();
    ApplicationReport report;
    YarnApplicationState lastAppState = YarnApplicationState.NEW;
    loop: while (true) {
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage());
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        switch (appState) {
        case FAILED:
        case FINISHED:
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this "
                    + "command to further investigate the issue:\n" + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            if (appState != lastAppState) {
                LOG.info("Deploying cluster, current state " + appState);
            }
            if (System.currentTimeMillis() - startTime > 60000) {
                LOG.info("Deployment took more than 60 seconds. Please check if the "
                        + "requested resources are available in the YARN cluster");
            }

        }
        lastAppState = appState;
        Thread.sleep(250);
    }
    // print the application id for user to cancel themselves.
    if (isDetachedMode()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop "
                + "Flink on YARN, use the following command or a YARN web interface to stop "
                + "it:\nyarn application -kill " + appId + "\nPlease also note that the "
                + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
        // we're already in the shut down hook.
    }

    String host = report.getHost();
    int port = report.getRpcPort();

    // Correctly initialize the Flink config
    flinkConfiguration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, host);
    flinkConfiguration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, port);

    // the Flink cluster is deployed in YARN. Represent cluster
    return createYarnClusterClient(this, yarnClient, report, flinkConfiguration, sessionFilesDir, true);
}

From source file:io.hops.metadata.util.TestHopYarnAPIUtilities.java

License:Apache License

private SubmitApplicationRequest mockSubmitAppRequest(ApplicationId appId, String name, String queue,
        Set<String> tags, boolean unmanaged) {

    //    ContainerLaunchContext amContainerSpec = mock(ContainerLaunchContext.class);
    ContainerLaunchContext amContainerSpec = new ContainerLaunchContextPBImpl();
    org.apache.hadoop.yarn.api.records.Resource resource = Resources
            .createResource(YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);

    ApplicationSubmissionContext submissionContext = recordFactory
            .newRecordInstance(ApplicationSubmissionContext.class);
    submissionContext.setAMContainerSpec(amContainerSpec);
    submissionContext.setApplicationName(name);
    submissionContext.setQueue(queue);/*from  w w  w  .  j av a 2s.co m*/
    submissionContext.setApplicationId(appId);
    submissionContext.setResource(resource);
    submissionContext.setApplicationType(appType);
    submissionContext.setApplicationTags(tags);
    submissionContext.setUnmanagedAM(unmanaged);

    SubmitApplicationRequest submitRequest = recordFactory.newRecordInstance(SubmitApplicationRequest.class);
    submitRequest.setApplicationSubmissionContext(submissionContext);
    return submitRequest;
}

From source file:io.hops.tensorflow.Client.java

License:Apache License

private ApplicationSubmissionContext createApplicationSubmissionContext(YarnClientApplication app,
        ContainerLaunchContext containerContext) {

    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();

    if (name == null) {
        appContext.setApplicationName(mainRelativePath);
    } else {//  w ww.java  2s .c o m
        appContext.setApplicationName(name);
    }
    appContext.setApplicationType("YARNTF");

    appContext.setMaxAppAttempts(maxAppAttempts);
    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);

    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }

    appContext.setResource(Resource.newInstance(amMemory, amVCores));
    appContext.setAMContainerSpec(containerContext);
    appContext.setPriority(Priority.newInstance(amPriority));
    appContext.setQueue(amQueue); // the queue to which this application is to be submitted in the RM

    return appContext;
}

From source file:org.apache.flink.yarn.AbstractYarnClusterDescriptor.java

License:Apache License

public ApplicationReport startAppMaster(JobGraph jobGraph, YarnClient yarnClient,
        YarnClientApplication yarnApplication) throws Exception {

    // ------------------ Set default file system scheme -------------------------

    try {/*from   w w  w  . j a  v  a 2 s . c o m*/
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }

    // initialize file system
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values."
                + "The Flink YARN client needs to store its files in a distributed file system");
    }

    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();
    Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size());
    for (File file : shipFiles) {
        effectiveShipFiles.add(file.getAbsoluteFile());
    }

    //check if there is a logback or log4j file
    File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME);
    final boolean hasLogback = logbackFile.exists();
    if (hasLogback) {
        effectiveShipFiles.add(logbackFile);
    }

    File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME);
    final boolean hasLog4j = log4jFile.exists();
    if (hasLog4j) {
        effectiveShipFiles.add(log4jFile);
        if (hasLogback) {
            // this means there is already a logback configuration file --> fail
            LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and "
                    + "Logback configuration files. Please delete or rename one of them.");
        }
    }

    addLibFolderToShipFiles(effectiveShipFiles);

    // add the user jar to the classpath of the to-be-created cluster
    if (userJarFiles != null) {
        effectiveShipFiles.addAll(userJarFiles);
    }

    // Set-up ApplicationSubmissionContext for the application

    final ApplicationId appId = appContext.getApplicationId();

    // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------
    String zkNamespace = getZookeeperNamespace();
    // no user specified cli argument for namespace?
    if (zkNamespace == null || zkNamespace.isEmpty()) {
        // namespace defined in config? else use applicationId as default.
        zkNamespace = flinkConfiguration.getString(HighAvailabilityOptions.HA_CLUSTER_ID,
                String.valueOf(appId));
        setZookeeperNamespace(zkNamespace);
    }

    flinkConfiguration.setString(HighAvailabilityOptions.HA_CLUSTER_ID, zkNamespace);

    if (HighAvailabilityMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));

        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext
                .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }

    // local resource map for Yarn
    final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size());
    // list of remote paths (after upload)
    final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size());
    // classpath assembler
    final StringBuilder classPathBuilder = new StringBuilder();
    // ship list that enables reuse of resources for task manager containers
    StringBuilder envShipFileList = new StringBuilder();

    // upload and register ship files
    for (File shipFile : effectiveShipFiles) {
        LocalResource shipResources = Records.newRecord(LocalResource.class);

        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());

        paths.add(remotePath);

        localResources.put(shipFile.getName(), shipResources);

        if (shipFile.isDirectory()) {
            // add directories to the classpath
            java.nio.file.Path shipPath = shipFile.toPath();
            final java.nio.file.Path parentPath = shipPath.getParent();

            Files.walkFileTree(shipPath, new SimpleFileVisitor<java.nio.file.Path>() {
                @Override
                public FileVisitResult preVisitDirectory(java.nio.file.Path dir, BasicFileAttributes attrs)
                        throws IOException {
                    super.preVisitDirectory(dir, attrs);

                    java.nio.file.Path relativePath = parentPath.relativize(dir);

                    classPathBuilder.append(relativePath).append(File.separator).append("*")
                            .append(File.pathSeparator);

                    return FileVisitResult.CONTINUE;
                }
            });
        } else {
            // add files to the classpath
            classPathBuilder.append(shipFile.getName()).append(File.pathSeparator);
        }

        envShipFileList.append(remotePath).append(",");
    }

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf,
            fs.getHomeDirectory());
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    paths.add(remotePathJar);
    classPathBuilder.append("flink.jar").append(File.pathSeparator);
    paths.add(remotePathConf);
    classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator);

    // write job graph to tmp file and add it to local resource
    // TODO: server use user main method to generate job graph
    if (jobGraph != null) {
        try {
            File fp = File.createTempFile(appId.toString(), null);
            fp.deleteOnExit();
            try (FileOutputStream output = new FileOutputStream(fp);
                    ObjectOutputStream obOutput = new ObjectOutputStream(output);) {
                obOutput.writeObject(jobGraph);
            }
            LocalResource jobgraph = Records.newRecord(LocalResource.class);
            Path remoteJobGraph = Utils.setupLocalResource(fs, appId.toString(), new Path(fp.toURI()), jobgraph,
                    fs.getHomeDirectory());
            localResources.put("job.graph", jobgraph);
            paths.add(remoteJobGraph);
            classPathBuilder.append("job.graph").append(File.pathSeparator);
        } catch (Exception e) {
            LOG.warn("Add job graph to local resource fail");
            throw e;
        }
    }

    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    //To support Yarn Secure Integration Test Scenario
    //In Integration test setup, the Yarn containers created by YarnMiniCluster does not have the Yarn site XML
    //and KRB5 configuration files. We are adding these files as container local resources for the container
    //applications (JM/TMs) to have proper secure cluster setup
    Path remoteKrb5Path = null;
    Path remoteYarnSiteXmlPath = null;
    boolean hasKrb5 = false;
    if (System.getenv("IN_TESTS") != null) {
        String krb5Config = System.getProperty("java.security.krb5.conf");
        if (krb5Config != null && krb5Config.length() != 0) {
            File krb5 = new File(krb5Config);
            LOG.info("Adding KRB5 configuration {} to the AM container local resource bucket",
                    krb5.getAbsolutePath());
            LocalResource krb5ConfResource = Records.newRecord(LocalResource.class);
            Path krb5ConfPath = new Path(krb5.getAbsolutePath());
            remoteKrb5Path = Utils.setupLocalResource(fs, appId.toString(), krb5ConfPath, krb5ConfResource,
                    fs.getHomeDirectory());
            localResources.put(Utils.KRB5_FILE_NAME, krb5ConfResource);

            File f = new File(System.getenv("YARN_CONF_DIR"), Utils.YARN_SITE_FILE_NAME);
            LOG.info("Adding Yarn configuration {} to the AM container local resource bucket",
                    f.getAbsolutePath());
            LocalResource yarnConfResource = Records.newRecord(LocalResource.class);
            Path yarnSitePath = new Path(f.getAbsolutePath());
            remoteYarnSiteXmlPath = Utils.setupLocalResource(fs, appId.toString(), yarnSitePath,
                    yarnConfResource, fs.getHomeDirectory());
            localResources.put(Utils.YARN_SITE_FILE_NAME, yarnConfResource);

            hasKrb5 = true;
        }
    }

    // setup security tokens
    LocalResource keytabResource = null;
    Path remotePathKeytab = null;
    String keytab = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_KEYTAB);
    if (keytab != null) {
        LOG.info("Adding keytab {} to the AM container local resource bucket", keytab);
        keytabResource = Records.newRecord(LocalResource.class);
        Path keytabPath = new Path(keytab);
        remotePathKeytab = Utils.setupLocalResource(fs, appId.toString(), keytabPath, keytabResource,
                fs.getHomeDirectory());
        localResources.put(Utils.KEYTAB_FILE_NAME, keytabResource);
    }

    final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j, hasKrb5);

    if (UserGroupInformation.isSecurityEnabled() && keytab == null) {
        //set tokens only when keytab is not provided
        LOG.info("Adding delegation token to the AM container..");
        Utils.setTokensFor(amContainer, paths, conf);
    }

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH and environment variables for ApplicationMaster
    final Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX,
            flinkConfiguration));
    // set Flink app class path
    appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());

    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));
    appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace());

    // https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md#identity-on-an-insecure-cluster-hadoop_user_name
    appMasterEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, UserGroupInformation.getCurrentUser().getUserName());

    if (keytabResource != null) {
        appMasterEnv.put(YarnConfigKeys.KEYTAB_PATH, remotePathKeytab.toString());
        String principal = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL);
        appMasterEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, principal);
    }

    //To support Yarn Secure Integration Test Scenario
    if (remoteYarnSiteXmlPath != null && remoteKrb5Path != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_YARN_SITE_XML_PATH, remoteYarnSiteXmlPath.toString());
        appMasterEnv.put(YarnConfigKeys.ENV_KRB5_PATH, remoteKrb5Path.toString());
    }

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    // set classpath from YARN configuration
    Utils.setupYarnClassPath(conf, appMasterEnv);

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name);
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    setApplicationTags(appContext);

    // add a hook to clean up in case deployment fails
    Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication);
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    final long startTime = System.currentTimeMillis();
    ApplicationReport report;
    YarnApplicationState lastAppState = YarnApplicationState.NEW;
    loop: while (true) {
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster.", e);
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        LOG.debug("Application State: {}", appState);
        switch (appState) {
        case FAILED:
        case FINISHED: //TODO: the finished state may be valid in flip-6
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n"
                    + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            if (appState != lastAppState) {
                LOG.info("Deploying cluster, current state " + appState);
            }
            if (System.currentTimeMillis() - startTime > 60000) {
                LOG.info(
                        "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster");
            }

        }
        lastAppState = appState;
        Thread.sleep(250);
    }
    // print the application id for user to cancel themselves.
    if (isDetachedMode()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop "
                + "Flink on YARN, use the following command or a YARN web interface to stop "
                + "it:\nyarn application -kill " + appId + "\nPlease also note that the "
                + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
        // we're already in the shut down hook.
    }
    return report;
}

From source file:org.apache.flink.yarn.FlinkYarnClient.java

License:Apache License

/**
 * This method will block until the ApplicationMaster/JobManager have been
 * deployed on YARN.// w  w w  .  jav a  2  s. c o  m
 */
protected AbstractFlinkYarnCluster deployInternal() throws Exception {
    isReadyForDepoyment();

    LOG.info("Using values:");
    LOG.info("\tTaskManager count = {}", taskManagerCount);
    LOG.info("\tJobManager memory = {}", jobManagerMemoryMb);
    LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb);

    // Create application via yarnClient
    yarnApplication = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();

    // ------------------ Add dynamic properties to local flinkConfiguraton ------

    List<Tuple2<String, String>> dynProperties = CliFrontend.getDynamicProperties(dynamicPropertiesEncoded);
    for (Tuple2<String, String> dynProperty : dynProperties) {
        flinkConfiguration.setString(dynProperty.f0, dynProperty.f1);
    }

    // ------------------ Check if the specified queue exists --------------

    try {
        List<QueueInfo> queues = yarnClient.getAllQueues();
        if (queues.size() > 0 && this.yarnQueue != null) { // check only if there are queues configured in yarn and for this session.
            boolean queueFound = false;
            for (QueueInfo queue : queues) {
                if (queue.getQueueName().equals(this.yarnQueue)) {
                    queueFound = true;
                    break;
                }
            }
            if (!queueFound) {
                String queueNames = "";
                for (QueueInfo queue : queues) {
                    queueNames += queue.getQueueName() + ", ";
                }
                LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: "
                        + queueNames);
            }
        } else {
            LOG.debug("The YARN cluster does not have any queues configured");
        }
    } catch (Throwable e) {
        LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error details", e);
        }
    }

    // ------------------ Check if the YARN Cluster has the requested resources --------------

    // the yarnMinAllocationMB specifies the smallest possible container allocation size.
    // all allocations below this value are automatically set to this value.
    final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
        LOG.warn("The JobManager or TaskManager memory is below the smallest possible YARN Container size. "
                + "The value of 'yarn.scheduler.minimum-allocation-mb' is '" + yarnMinAllocationMB
                + "'. Please increase the memory size."
                + "YARN will allocate the smaller containers but the scheduler will account for the minimum-allocation-mb, maybe not all instances "
                + "you requested will start.");
    }

    // set the memory to minAllocationMB to do the next checks correctly
    if (jobManagerMemoryMb < yarnMinAllocationMB) {
        jobManagerMemoryMb = yarnMinAllocationMB;
    }
    if (taskManagerMemoryMb < yarnMinAllocationMB) {
        taskManagerMemoryMb = yarnMinAllocationMB;
    }

    Resource maxRes = appResponse.getMaximumResourceCapability();
    final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the 'yarn.nodemanager.resource.memory-mb' configuration values\n";
    if (jobManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment();
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the JobManager available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb
                        + "MB. " + NOTE);
    }

    if (taskManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment();
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the TaskManagers available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb
                        + "MB. " + NOTE);
    }

    final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, but maybe not all TaskManagers are "
            + "connecting from the beginning because the resources are currently not available in the cluster. "
            + "The allocation might take more time than usual because the Flink YARN client needs to wait until "
            + "the resources become available.";
    int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC);

    }
    if (taskManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb
                + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit
                + NOTE_RSC);
    }
    if (jobManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn(
                "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than "
                        + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }

    // ----------------- check if the requested containers fit into the cluster.

    int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length);
    // first, allocate the jobManager somewhere.
    if (!allocateResource(nmFree, jobManagerMemoryMb)) {
        LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. "
                + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: "
                + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC);
    }
    // allocate TaskManagers
    for (int i = 0; i < taskManagerCount; i++) {
        if (!allocateResource(nmFree, taskManagerMemoryMb)) {
            LOG.warn("There is not enough memory available in the YARN cluster. "
                    + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. "
                    + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n"
                    + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/"
                    + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: "
                    + Arrays.toString(nmFree) + NOTE_RSC);
        }
    }

    // ------------------ Prepare Application Master Container  ------------------------------

    // respect custom JVM options in the YAML file
    final String javaOpts = flinkConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, "");

    String logbackFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
    boolean hasLogback = new File(logbackFile).exists();
    String log4jFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;

    boolean hasLog4j = new File(log4jFile).exists();
    if (hasLogback) {
        shipFiles.add(new File(logbackFile));
    }
    if (hasLog4j) {
        shipFiles.add(new File(log4jFile));
    }

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    String amCommand = "$JAVA_HOME/bin/java" + " -Xmx"
            + Utils.calculateHeapSize(jobManagerMemoryMb, flinkConfiguration) + "M " + javaOpts;

    if (hasLogback || hasLog4j) {
        amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-main.log\"";
    }

    if (hasLogback) {
        amCommand += " -Dlogback.configurationFile=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
    }
    if (hasLog4j) {
        amCommand += " -Dlog4j.configuration=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;
    }

    amCommand += " " + ApplicationMaster.class.getName() + " " + " 1>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stdout.log" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stderr.log";
    amContainer.setCommands(Collections.singletonList(amCommand));

    LOG.debug("Application Master start command: " + amCommand);

    // intialize HDFS
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the sytem is using the default Hadoop configuration values."
                + "The Flink YARN client needs to store its files in a distributed file system");
    }

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();
    appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));

    final ApplicationId appId = appContext.getApplicationId();

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(conf, fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(conf, fs, appId.toString(), flinkConfigurationPath,
            flinkConf, fs.getHomeDirectory());
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2);
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    // setup security tokens (code from apache storm)
    final Path[] paths = new Path[2 + shipFiles.size()];
    StringBuilder envShipFileList = new StringBuilder();
    // upload ship files
    for (int i = 0; i < shipFiles.size(); i++) {
        File shipFile = shipFiles.get(i);
        LocalResource shipResources = Records.newRecord(LocalResource.class);
        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        paths[2 + i] = Utils.setupLocalResource(conf, fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());
        localResources.put(shipFile.getName(), shipResources);

        envShipFileList.append(paths[2 + i]);
        if (i + 1 < shipFiles.size()) {
            envShipFileList.append(',');
        }
    }

    paths[0] = remotePathJar;
    paths[1] = remotePathConf;
    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    Utils.setTokensFor(amContainer, paths, conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    Utils.setupEnv(conf, appMasterEnv);
    // set configuration values
    appMasterEnv.put(FlinkYarnClient.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(FlinkYarnClient.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(FlinkYarnClient.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(FlinkYarnClient.ENV_APP_ID, appId.toString());
    appMasterEnv.put(FlinkYarnClient.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(FlinkYarnClient.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(FlinkYarnClient.ENV_CLIENT_USERNAME,
            UserGroupInformation.getCurrentUser().getShortUserName());
    appMasterEnv.put(FlinkYarnClient.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(FlinkYarnClient.ENV_DETACHED, String.valueOf(detached));
    appMasterEnv.put(FlinkYarnClient.ENV_STREAMING_MODE, String.valueOf(streamingMode));

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(FlinkYarnClient.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name); // application name
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    int waittime = 0;
    loop: while (true) {
        ApplicationReport report = yarnClient.getApplicationReport(appId);
        YarnApplicationState appState = report.getYarnApplicationState();
        switch (appState) {
        case FAILED:
        case FINISHED:
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this command to further invesitage the issue:\n"
                    + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            LOG.info("Deploying cluster, current state " + appState);
            if (waittime > 60000) {
                LOG.info(
                        "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster");
            }

        }
        waittime += 1000;
        Thread.sleep(1000);
    }
    // the Flink cluster is deployed in YARN. Represent cluster
    return new FlinkYarnCluster(yarnClient, appId, conf, flinkConfiguration, sessionFilesDir, detached);
}