Example usage for org.apache.hadoop.yarn.api.records NodeState RUNNING

List of usage examples for org.apache.hadoop.yarn.api.records NodeState RUNNING

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records NodeState RUNNING.

Prototype

NodeState RUNNING

To view the source code for org.apache.hadoop.yarn.api.records NodeState RUNNING.

Click Source Link

Document

Running node

Usage

From source file:io.amient.yarn1.YarnClient.java

License:Open Source License

/**
 * This method should be called by the implementing application static main
 * method. It does all the work around creating a yarn application and
 * submitting the request to the yarn resource manager. The class given in
 * the appClass argument will be run inside the yarn-allocated master
 * container.//w ww.j  a v  a2 s . co m
 */
public static void submitApplicationMaster(Properties appConfig, Class<? extends YarnMaster> masterClass,
        String[] args, Boolean awaitCompletion) throws Exception {
    log.info("Yarn1 App Configuration:");
    for (Object param : appConfig.keySet()) {
        log.info(param.toString() + " = " + appConfig.get(param).toString());
    }
    String yarnConfigPath = appConfig.getProperty("yarn1.site", "/etc/hadoop");
    String masterClassName = masterClass.getName();
    appConfig.setProperty("yarn1.master.class", masterClassName);
    String applicationName = appConfig.getProperty("yarn1.application.name", masterClassName);
    log.info("--------------------------------------------------------------");

    if (Boolean.valueOf(appConfig.getProperty("yarn1.local.mode", "false"))) {
        YarnMaster.run(appConfig, args);
        return;
    }

    int masterPriority = Integer.valueOf(
            appConfig.getProperty("yarn1.master.priority", String.valueOf(YarnMaster.DEFAULT_MASTER_PRIORITY)));
    int masterMemoryMb = Integer.valueOf(appConfig.getProperty("yarn1.master.memory.mb",
            String.valueOf(YarnMaster.DEFAULT_MASTER_MEMORY_MB)));
    int masterNumCores = Integer.valueOf(
            appConfig.getProperty("yarn1.master.num.cores", String.valueOf(YarnMaster.DEFAULT_MASTER_CORES)));
    String queue = appConfig.getProperty("yarn1.queue");

    Configuration yarnConfig = new YarnConfiguration();
    yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/core-site.xml"));
    yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/hdfs-site.xml"));
    yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/yarn-site.xml"));
    for (Map.Entry<Object, Object> entry : appConfig.entrySet()) {
        yarnConfig.set(entry.getKey().toString(), entry.getValue().toString());
    }

    final org.apache.hadoop.yarn.client.api.YarnClient yarnClient = org.apache.hadoop.yarn.client.api.YarnClient
            .createYarnClient();
    yarnClient.init(yarnConfig);
    yarnClient.start();

    for (NodeReport report : yarnClient.getNodeReports(NodeState.RUNNING)) {
        log.debug("Node report:" + report.getNodeId() + " @ " + report.getHttpAddress() + " | "
                + report.getCapability());
    }

    log.info("Submitting application master class " + masterClassName);

    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    final ApplicationId appId = appResponse.getApplicationId();
    if (appId == null) {
        System.exit(111);
    } else {
        appConfig.setProperty("am.timestamp", String.valueOf(appId.getClusterTimestamp()));
        appConfig.setProperty("am.id", String.valueOf(appId.getId()));
    }

    YarnClient.distributeResources(yarnConfig, appConfig, applicationName);

    String masterJvmArgs = appConfig.getProperty("yarn1.master.jvm.args", "");
    YarnContainerContext masterContainer = new YarnContainerContext(yarnConfig, appConfig, masterJvmArgs,
            masterPriority, masterMemoryMb, masterNumCores, applicationName, YarnMaster.class, args);

    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationName(masterClassName);
    appContext.setResource(masterContainer.capability);
    appContext.setPriority(masterContainer.priority);
    appContext.setQueue(queue);
    appContext.setApplicationType(appConfig.getProperty("yarn1.application.type", "YARN"));
    appContext.setAMContainerSpec(masterContainer.createContainerLaunchContext());

    log.info("Master container spec: " + masterContainer.capability);

    yarnClient.submitApplication(appContext);

    ApplicationReport report = yarnClient.getApplicationReport(appId);
    log.info("Tracking URL: " + report.getTrackingUrl());

    if (awaitCompletion) {
        Runtime.getRuntime().addShutdownHook(new Thread() {
            @Override
            public void run() {
                if (!yarnClient.isInState(Service.STATE.STOPPED)) {
                    log.info("Killing yarn application in shutdown hook");
                    try {
                        yarnClient.killApplication(appId);
                    } catch (Throwable e) {
                        log.error("Failed to kill yarn application - please check YARN Resource Manager", e);
                    }
                }
            }
        });

        float lastProgress = -0.0f;
        while (true) {
            try {
                Thread.sleep(10000);
                report = yarnClient.getApplicationReport(appId);
                if (lastProgress != report.getProgress()) {
                    lastProgress = report.getProgress();
                    log.info(report.getApplicationId() + " " + (report.getProgress() * 100.00) + "% "
                            + (System.currentTimeMillis() - report.getStartTime()) + "(ms) "
                            + report.getDiagnostics());
                }
                if (!report.getFinalApplicationStatus().equals(FinalApplicationStatus.UNDEFINED)) {
                    log.info(report.getApplicationId() + " " + report.getFinalApplicationStatus());
                    log.info("Tracking url: " + report.getTrackingUrl());
                    log.info("Finish time: " + ((System.currentTimeMillis() - report.getStartTime()) / 1000)
                            + "(s)");
                    break;
                }
            } catch (Throwable e) {
                log.error("Master Heart Beat Error - terminating", e);
                yarnClient.killApplication(appId);
                Thread.sleep(2000);
            }
        }
        yarnClient.stop();

        if (!report.getFinalApplicationStatus().equals(FinalApplicationStatus.SUCCEEDED)) {
            System.exit(112);
        }
    }
    yarnClient.stop();
}

From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java

License:Apache License

private ClusterResourceDescription getCurrentFreeClusterResources(YarnClient yarnClient)
        throws YarnException, IOException {
    List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);

    int totalFreeMemory = 0;
    int containerLimit = 0;
    int[] nodeManagersFree = new int[nodes.size()];

    for (int i = 0; i < nodes.size(); i++) {
        NodeReport rep = nodes.get(i);//from  www  . ja  va  2s .  com
        int free = rep.getCapability().getMemory() - (rep.getUsed() != null ? rep.getUsed().getMemory() : 0);
        nodeManagersFree[i] = free;
        totalFreeMemory += free;
        if (free > containerLimit) {
            containerLimit = free;
        }
    }
    return new ClusterResourceDescription(totalFreeMemory, containerLimit, nodeManagersFree);
}

From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java

License:Apache License

@Override
public String getClusterDescription() {

    try {// w ww.  ja v  a2 s .  c  om
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        PrintStream ps = new PrintStream(baos);

        YarnClient yarnClient = getYarnClient();
        YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics();

        ps.append("NodeManagers in the ClusterClient " + metrics.getNumNodeManagers());
        List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);
        final String format = "|%-16s |%-16s %n";
        ps.printf("|Property         |Value          %n");
        ps.println("+---------------------------------------+");
        int totalMemory = 0;
        int totalCores = 0;
        for (NodeReport rep : nodes) {
            final Resource res = rep.getCapability();
            totalMemory += res.getMemory();
            totalCores += res.getVirtualCores();
            ps.format(format, "NodeID", rep.getNodeId());
            ps.format(format, "Memory", res.getMemory() + " MB");
            ps.format(format, "vCores", res.getVirtualCores());
            ps.format(format, "HealthReport", rep.getHealthReport());
            ps.format(format, "Containers", rep.getNumContainers());
            ps.println("+---------------------------------------+");
        }
        ps.println("Summary: totalMemory " + totalMemory + " totalCores " + totalCores);
        List<QueueInfo> qInfo = yarnClient.getAllQueues();
        for (QueueInfo q : qInfo) {
            ps.println("Queue: " + q.getQueueName() + ", Current Capacity: " + q.getCurrentCapacity()
                    + " Max Capacity: " + q.getMaximumCapacity() + " Applications: "
                    + q.getApplications().size());
        }
        yarnClient.stop();
        return baos.toString();
    } catch (Exception e) {
        throw new RuntimeException("Couldn't get cluster description", e);
    }
}

From source file:io.hops.tensorflow.Client.java

License:Apache License

private void logClusterState() throws IOException, YarnException {
    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info(//  www  .  j  a v  a2  s.co  m
            "Got Cluster metric info from ASM" + "\n\t numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + "\n\t nodeId=" + node.getNodeId() + "\n\t nodeAddress"
                + node.getHttpAddress() + "\n\t nodeRackName" + node.getRackName() + "\n\t nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + "\n\t queueName=" + queueInfo.getQueueName() + "\n\t queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + "\n\t queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + "\n\t queueApplicationCount=" + queueInfo.getApplications().size() + "\n\t queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + "\n\t queueName=" + aclInfo.getQueueName() + "\n\t userAcl="
                    + userAcl.name());
        }
    }
}

From source file:me.haosdent.noya.Client.java

License:Apache License

/**
 * Main run function for the client// w ww.  j av  a  2s  . co  m
 *
 * @return true if application completed successfully
 *
 * @throws java.io.IOException
 * @throws org.apache.hadoop.yarn.exceptions.YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    //appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }
    // Set local resource info into app master container launch context
    amContainer.setLocalResources(localResources);

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            ApplicationConstants.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    amContainer.setEnvironment(env);

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(ApplicationConstants.Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    //commands.add(command.toString());
    commands.add("echo 'hello' >/tmp/yarn_test");
    amContainer.setCommands(commands);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(amMemory);
    capability.setVirtualCores(amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Records.newRecord(Priority.class);
    // TODO - what is the range for priority? how to decide?
    pri.setPriority(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:org.apache.drill.yarn.appMaster.AMYarnFacadeImpl.java

License:Apache License

@Override
public List<NodeReport> getNodeReports() throws YarnFacadeException {
    try {/*from   w  ww. j av  a  2  s.c o  m*/
        return client.getNodeReports(NodeState.RUNNING);
    } catch (Exception e) {
        throw new YarnFacadeException("getNodeReports failed", e);
    }
}

From source file:org.apache.flink.yarn.FlinkYarnClient.java

License:Apache License

public String getClusterDescription() throws Exception {

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    PrintStream ps = new PrintStream(baos);

    YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics();

    ps.append("NodeManagers in the Cluster " + metrics.getNumNodeManagers());
    List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);
    final String format = "|%-16s |%-16s %n";
    ps.printf("|Property         |Value          %n");
    ps.println("+---------------------------------------+");
    int totalMemory = 0;
    int totalCores = 0;
    for (NodeReport rep : nodes) {
        final Resource res = rep.getCapability();
        totalMemory += res.getMemory();/*www. jav a2 s.c  o  m*/
        totalCores += res.getVirtualCores();
        ps.format(format, "NodeID", rep.getNodeId());
        ps.format(format, "Memory", res.getMemory() + " MB");
        ps.format(format, "vCores", res.getVirtualCores());
        ps.format(format, "HealthReport", rep.getHealthReport());
        ps.format(format, "Containers", rep.getNumContainers());
        ps.println("+---------------------------------------+");
    }
    ps.println("Summary: totalMemory " + totalMemory + " totalCores " + totalCores);
    List<QueueInfo> qInfo = yarnClient.getAllQueues();
    for (QueueInfo q : qInfo) {
        ps.println("Queue: " + q.getQueueName() + ", Current Capacity: " + q.getCurrentCapacity()
                + " Max Capacity: " + q.getMaximumCapacity() + " Applications: " + q.getApplications().size());
    }
    yarnClient.stop();
    return baos.toString();
}

From source file:org.apache.flink.yarn.FlinkYarnClientBase.java

License:Apache License

@Override
public String getClusterDescription() throws Exception {

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    PrintStream ps = new PrintStream(baos);

    YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics();

    ps.append("NodeManagers in the Cluster " + metrics.getNumNodeManagers());
    List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);
    final String format = "|%-16s |%-16s %n";
    ps.printf("|Property         |Value          %n");
    ps.println("+---------------------------------------+");
    int totalMemory = 0;
    int totalCores = 0;
    for (NodeReport rep : nodes) {
        final Resource res = rep.getCapability();
        totalMemory += res.getMemory();//from  ww w.  j a va2  s  .co  m
        totalCores += res.getVirtualCores();
        ps.format(format, "NodeID", rep.getNodeId());
        ps.format(format, "Memory", res.getMemory() + " MB");
        ps.format(format, "vCores", res.getVirtualCores());
        ps.format(format, "HealthReport", rep.getHealthReport());
        ps.format(format, "Containers", rep.getNumContainers());
        ps.println("+---------------------------------------+");
    }
    ps.println("Summary: totalMemory " + totalMemory + " totalCores " + totalCores);
    List<QueueInfo> qInfo = yarnClient.getAllQueues();
    for (QueueInfo q : qInfo) {
        ps.println("Queue: " + q.getQueueName() + ", Current Capacity: " + q.getCurrentCapacity()
                + " Max Capacity: " + q.getMaximumCapacity() + " Applications: " + q.getApplications().size());
    }
    yarnClient.stop();
    return baos.toString();
}

From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java

License:Apache License

/**
 * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213).
 *///from  w w  w .  j  ava2  s.  c om
@Test(timeout = 100000) // timeout after 100 seconds
public void testTaskManagerFailure() {
    LOG.info("Starting testTaskManagerFailure()");
    Runner runner = startWithArgs(
            new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n",
                    "1", "-jm", "768", "-tm", "1024", "-s", "3", // set the slots 3 to check if the vCores are set properly!
                    "-nm", "customName", "-Dfancy-configuration-value=veryFancy",
                    "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" },
            "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION);

    Assert.assertEquals(2, getRunningContainers());

    // ------------------------ Test if JobManager web interface is accessible -------

    YarnClient yc = null;
    try {
        yc = YarnClient.createYarnClient();
        yc.init(yarnConfiguration);
        yc.start();

        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        Assert.assertEquals(1, apps.size()); // Only one running
        ApplicationReport app = apps.get(0);
        Assert.assertEquals("customName", app.getName());
        String url = app.getTrackingUrl();
        if (!url.endsWith("/")) {
            url += "/";
        }
        if (!url.startsWith("http://")) {
            url = "http://" + url;
        }
        LOG.info("Got application URL from YARN {}", url);

        String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");

        JsonNode parsedTMs = new ObjectMapper().readTree(response);
        ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers");
        Assert.assertNotNull(taskManagers);
        Assert.assertEquals(1, taskManagers.size());
        Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt());

        // get the configuration from webinterface & check if the dynamic properties from YARN show up there.
        String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
        Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig);

        Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
        Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));
        Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES));

        // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
        // first, get the hostname/port
        String oC = outContent.toString();
        Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
        Matcher matches = p.matcher(oC);
        String hostname = null;
        String port = null;
        while (matches.find()) {
            hostname = matches.group(1).toLowerCase();
            port = matches.group(2);
        }
        LOG.info("Extracted hostname:port: {} {}", hostname, port);

        Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname,
                parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
        Assert.assertEquals("unable to find port in " + jsonConfig, port,
                parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));

        // test logfile access
        String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
        Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster"));
        Assert.assertTrue(logs.contains("Starting JobManager"));
        Assert.assertTrue(logs.contains("Starting JobManager Web Frontend"));
    } catch (Throwable e) {
        LOG.warn("Error while running test", e);
        Assert.fail(e.getMessage());
    }

    // ------------------------ Kill container with TaskManager and check if vcores are set correctly -------

    // find container id of taskManager:
    ContainerId taskManagerContainer = null;
    NodeManager nodeManager = null;
    UserGroupInformation remoteUgi = null;
    NMTokenIdentifier nmIdent = null;
    try {
        remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
        LOG.warn("Unable to get curr user", e);
        Assert.fail();
    }
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
        NodeManager nm = yarnCluster.getNodeManager(nmId);
        ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
        for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
            String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
            if (command.contains(YarnTaskManager.class.getSimpleName())) {
                taskManagerContainer = entry.getKey();
                nodeManager = nm;
                nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
                // allow myself to do stuff with the container
                // remoteUgi.addCredentials(entry.getValue().getCredentials());
                remoteUgi.addTokenIdentifier(nmIdent);
            }
        }
        sleep(500);
    }

    Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
    Assert.assertNotNull("Illegal state", nodeManager);

    try {
        List<NodeReport> nodeReports = yc.getNodeReports(NodeState.RUNNING);

        // we asked for one node with 2 vcores so we expect 2 vcores
        int userVcores = 0;
        for (NodeReport rep : nodeReports) {
            userVcores += rep.getUsed().getVirtualCores();
        }
        Assert.assertEquals(2, userVcores);
    } catch (Exception e) {
        Assert.fail("Test failed: " + e.getMessage());
    }

    yc.stop();

    List<ContainerId> toStop = new LinkedList<ContainerId>();
    toStop.add(taskManagerContainer);
    StopContainersRequest scr = StopContainersRequest.newInstance(toStop);

    try {
        nodeManager.getNMContext().getContainerManager().stopContainers(scr);
    } catch (Throwable e) {
        LOG.warn("Error stopping container", e);
        Assert.fail("Error stopping container: " + e.getMessage());
    }

    // stateful termination check:
    // wait until we saw a container being killed and AFTERWARDS a new one launched
    boolean ok = false;
    do {
        LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());

        String o = errContent.toString();
        int killedOff = o.indexOf("Container killed by the ApplicationMaster");
        if (killedOff != -1) {
            o = o.substring(killedOff);
            ok = o.indexOf("Launching TaskManager") > 0;
        }
        sleep(1000);
    } while (!ok);

    // send "stop" command to command line interface
    runner.sendStop();
    // wait for the thread to stop
    try {
        runner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Interrupted while stopping runner", e);
    }
    LOG.warn("stopped");

    // ----------- Send output to logger
    System.setOut(originalStdout);
    System.setErr(originalStderr);
    String oC = outContent.toString();
    String eC = errContent.toString();
    LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
    LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);

    // ------ Check if everything happened correctly
    Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster"));

    Assert.assertTrue("Expect to see failed container",
            eC.contains("Container killed by the ApplicationMaster"));

    Assert.assertTrue("Expect to see new container started",
            eC.contains("Launching TaskManager") && eC.contains("on host"));

    // cleanup auth for the subsequent tests.
    remoteUgi.getTokenIdentifiers().remove(nmIdent);

    LOG.info("Finished testTaskManagerFailure()");
}

From source file:org.apache.giraph.yarn.GiraphYarnClient.java

License:Apache License

/**
 * Utility to make sure we have the cluster resources we need to run this
 * job. If they are not available, we should die here before too much setup.
 * @param cluster the GetNewApplicationResponse from the YARN RM.
 *///from   w  w w. j  a  va 2s. c o m
private void checkPerNodeResourcesAvailable(final GetNewApplicationResponse cluster)
        throws YarnException, IOException {
    // are there enough containers to go around for our Giraph job?
    List<NodeReport> nodes = null;
    long totalAvailable = 0;
    try {
        nodes = yarnClient.getNodeReports(NodeState.RUNNING);
    } catch (YarnException yre) {
        throw new RuntimeException("GiraphYarnClient could not connect with "
                + "the YARN ResourceManager to determine the number of available " + "application containers.",
                yre);
    }
    for (NodeReport node : nodes) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress "
                + node.getHttpAddress() + ", nodeRackName " + node.getRackName() + ", nodeNumContainers "
                + node.getNumContainers());
        totalAvailable += node.getCapability().getMemory();
    }
    // 1 master + all workers in -w command line arg
    final int workers = giraphConf.getMaxWorkers() + 1;
    checkAndAdjustPerTaskHeapSize(cluster);
    final long totalAsk = giraphConf.getYarnTaskHeapMb() * workers;
    if (totalAsk > totalAvailable) {
        throw new IllegalStateException("Giraph's estimated cluster heap " + totalAsk
                + "MB ask is greater than the current available cluster " + "heap of " + totalAvailable
                + "MB. Aborting Job.");
    }
}