Example usage for org.apache.hadoop.yarn.client.api YarnClient getNodeReports

List of usage examples for org.apache.hadoop.yarn.client.api YarnClient getNodeReports

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.client.api YarnClient getNodeReports.

Prototype

public abstract List<NodeReport> getNodeReports(NodeState... states) throws YarnException, IOException;

Source Link

Document

Get a report of nodes ( NodeReport ) in the cluster.

Usage

From source file:alluxio.yarn.YarnUtils.java

License:Apache License

/**
 * Returns the host names for all nodes in yarnClient's YARN cluster.
 *
 * @param yarnClient the client to use to look up node information
 * @return the set of host names/*from   w ww.  j  av a  2s . c o  m*/
 * @throws YarnException if an error occurs within YARN
 * @throws IOException if an error occurs in YARN's underlying IO
 */
public static Set<String> getNodeHosts(YarnClient yarnClient) throws YarnException, IOException {
    ImmutableSet.Builder<String> nodeHosts = ImmutableSet.builder();
    for (NodeReport runningNode : yarnClient.getNodeReports(USABLE_NODE_STATES)) {
        nodeHosts.add(runningNode.getNodeId().getHost());
    }
    return nodeHosts.build();
}

From source file:cn.edu.buaa.act.petuumOnYarn.ApplicationMaster.java

License:Apache License

private boolean getAvaliableNodes() {
    List<NodeReport> clusterNodeReports;
    try {/*from ww  w  . j  a  va2 s .  c o  m*/
        YarnClient yarnClient = YarnClient.createYarnClient();
        yarnClient.init(conf);
        yarnClient.start();

        clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
        for (NodeReport node : clusterNodeReports) {
            LOG.info("node infos:" + node.getHttpAddress());
        }

        avaliableNodeList = new ArrayList<NodeReport>();
        if (numNodes <= clusterNodeReports.size()) {
            for (NodeReport node : clusterNodeReports) {
                if (node.getCapability().getMemory() >= containerMemory
                        && node.getCapability().getVirtualCores() >= containerVirtualCores) {
                    avaliableNodeList.add(node);
                }
            }
            if (avaliableNodeList.size() >= numNodes)
                numTotalContainers = numNodes;
            else {
                LOG.error("Resource isn't enough");
                return false;
            }
        } else {
            LOG.error("cluster nodes isn't enough");
            return false;
        }
    } catch (Exception e) {
        LOG.error(e.getMessage());
        LOG.error(e.getStackTrace());
        return false;
    }
    return true;
}

From source file:com.cloudera.llama.am.MiniLlama.java

License:Apache License

private List<NodeId> getYarnNodeIds(Configuration conf) throws Exception {
    List<NodeId> list = new ArrayList<NodeId>();
    if (miniYarn != null) {
        int clusterNodes = getConf().getInt(MINI_CLUSTER_NODES_KEY, 1);
        for (int i = 0; i < clusterNodes; i++) {
            list.add(miniYarn.getNodeManager(i).getNMContext().getNodeId());
        }//from  w w w. ja  v  a 2  s  . c  o  m
    } else {
        YarnClient yarnClient = YarnClient.createYarnClient();
        yarnClient.init(conf);
        yarnClient.start();
        List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);
        for (int i = 0; i < nodes.size(); i++) {
            list.add(nodes.get(i).getNodeId());
        }
        yarnClient.stop();
    }
    return list;
}

From source file:com.flyhz.avengers.framework.application.InitEnvApplication.java

License:Apache License

private void initJar() {
    LOG.info("initJar");
    numTotalContainers = 1;/*from  ww  w .  ja  va2  s . co m*/
    containerMemory = 10;
    if (numTotalContainers == 0) {
        throw new IllegalArgumentException("Cannot run distributed shell with no containers");
    }
    requestPriority = 0;
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();
    List<NodeReport> clusterNodeReports;
    try {
        clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
        LOG.info("Got all node!");
        for (NodeReport node : clusterNodeReports) {
            nodeSet.add(node.getNodeId().getHost());
        }

        LOG.info("initJar Completed setting up app master command {}", initJarCmd);
        numTotalContainers = nodeSet.size();
        for (int i = 0; i < numTotalContainers; i++) {
            ContainerRequest containerAsk = setupContainerAskForRM();
            amRMClient.addContainerRequest(containerAsk);
        }

        numRequestedContainers.set(numTotalContainers);

    } catch (YarnException e) {
        LOG.error("initJarAndClasspath", e);
    } catch (IOException e) {
        LOG.error("initJarAndClasspath", e);
    } finally {
        try {
            yarnClient.close();
        } catch (IOException e) {
        }
    }
}

From source file:com.flyhz.avengers.framework.AvengersAppMaster.java

License:Apache License

private void initJar() {
    LOG.info("initJar");
    initCommon();//from w  w w.j  av a2s  .c om
    this.currentProcess = "initJar";
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();
    List<NodeReport> clusterNodeReports;
    try {
        clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
        LOG.info("Got all node!");
        for (NodeReport node : clusterNodeReports) {
            LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                    + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                    + node.getNumContainers());
            nodeSet.add(node.getNodeId().getHost());
        }
        Vector<CharSequence> vargs = new Vector<CharSequence>(30);
        String appTempDir = conf.get("hadoop.tmp.dir");
        FileSystem fs = DistributedFileSystem.get(conf);
        String hdfsJar = fs.getHomeDirectory() + "/avengers/" + this.appAttemptID.getApplicationId().getId()
                + "/AvengersAppMaster.jar";
        vargs.add("if [ ! -e " + appTempDir + "/" + this.appAttemptID.getApplicationId().getId()
                + "/AvengersAppMaster.jar" + " ];");
        vargs.add("then");
        vargs.add("/bin/mkdir -p");
        vargs.add(appTempDir + "/" + this.appAttemptID.getApplicationId().getId() + ";");
        vargs.add(Environment.HADOOP_YARN_HOME.$() + "/bin/hadoop");
        vargs.add("fs");
        vargs.add("-copyToLocal");
        vargs.add(hdfsJar);
        vargs.add(appTempDir + "/" + this.appAttemptID.getApplicationId().getId() + ";");
        vargs.add("fi");
        // Add log redirect params
        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Avengers.stdout");
        vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Avengers.stderr");

        // Get final commmand
        StringBuilder command = new StringBuilder();
        for (CharSequence str : vargs) {
            command.append(str).append(" ");
        }

        initJarCmd = command.toString();
        LOG.info("initJar Completed setting up app master command {}", initJarCmd);
        numTotalContainers = nodeSet.size();
        for (int i = 0; i < numTotalContainers; i++) {
            ContainerRequest containerAsk = setupContainerAskForRM();
            amRMClient.addContainerRequest(containerAsk);
        }

        numRequestedContainers.set(numTotalContainers);

        while (!done && (numCompletedContainers.get() != numTotalContainers)) {
            try {
                Thread.sleep(200);
            } catch (InterruptedException ex) {
            }
        }
    } catch (YarnException e) {
        LOG.error("initJarAndClasspath", e);
    } catch (IOException e) {
        LOG.error("initJarAndClasspath", e);
    } finally {
        try {
            yarnClient.close();
        } catch (IOException e) {
        }
    }
}

From source file:eu.stratosphere.yarn.Client.java

License:Apache License

private ClusterResourceDescription getCurrentFreeClusterResources(YarnClient yarnClient)
        throws YarnException, IOException {
    ClusterResourceDescription crd = new ClusterResourceDescription();
    crd.totalFreeMemory = 0;//w  ww  .j  av a  2 s.  com
    crd.containerLimit = 0;
    List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);
    for (NodeReport rep : nodes) {
        int free = rep.getCapability().getMemory() - (rep.getUsed() != null ? rep.getUsed().getMemory() : 0);
        crd.totalFreeMemory += free;
        if (free > crd.containerLimit) {
            crd.containerLimit = free;
        }
    }
    return crd;
}

From source file:eu.stratosphere.yarn.Client.java

License:Apache License

private void showClusterMetrics(YarnClient yarnClient) throws YarnException, IOException {
    YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics();
    System.out.println("NodeManagers in the Cluster " + metrics.getNumNodeManagers());
    List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);
    final String format = "|%-16s |%-16s %n";
    System.out.printf("|Property         |Value          %n");
    System.out.println("+---------------------------------------+");
    int totalMemory = 0;
    int totalCores = 0;
    for (NodeReport rep : nodes) {
        final Resource res = rep.getCapability();
        totalMemory += res.getMemory();/*from   w ww  . j a v a  2s.  co m*/
        totalCores += res.getVirtualCores();
        System.out.format(format, "NodeID", rep.getNodeId());
        System.out.format(format, "Memory", res.getMemory() + " MB");
        System.out.format(format, "vCores", res.getVirtualCores());
        System.out.format(format, "HealthReport", rep.getHealthReport());
        System.out.format(format, "Containers", rep.getNumContainers());
        System.out.println("+---------------------------------------+");
    }
    System.out.println("Summary: totalMemory " + totalMemory + " totalCores " + totalCores);
    List<QueueInfo> qInfo = yarnClient.getAllQueues();
    for (QueueInfo q : qInfo) {
        System.out.println("Queue: " + q.getQueueName() + ", Current Capacity: " + q.getCurrentCapacity()
                + " Max Capacity: " + q.getMaximumCapacity() + " Applications: " + q.getApplications().size());
    }
    yarnClient.stop();
    System.exit(0);
}

From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java

License:Apache License

private ClusterResourceDescription getCurrentFreeClusterResources(YarnClient yarnClient)
        throws YarnException, IOException {
    List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);

    int totalFreeMemory = 0;
    int containerLimit = 0;
    int[] nodeManagersFree = new int[nodes.size()];

    for (int i = 0; i < nodes.size(); i++) {
        NodeReport rep = nodes.get(i);/*from   w w  w. ja v  a2s.c o m*/
        int free = rep.getCapability().getMemory() - (rep.getUsed() != null ? rep.getUsed().getMemory() : 0);
        nodeManagersFree[i] = free;
        totalFreeMemory += free;
        if (free > containerLimit) {
            containerLimit = free;
        }
    }
    return new ClusterResourceDescription(totalFreeMemory, containerLimit, nodeManagersFree);
}

From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java

License:Apache License

@Override
public String getClusterDescription() {

    try {/*from   ww w.  j  a  v  a 2 s .  c  om*/
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        PrintStream ps = new PrintStream(baos);

        YarnClient yarnClient = getYarnClient();
        YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics();

        ps.append("NodeManagers in the ClusterClient " + metrics.getNumNodeManagers());
        List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);
        final String format = "|%-16s |%-16s %n";
        ps.printf("|Property         |Value          %n");
        ps.println("+---------------------------------------+");
        int totalMemory = 0;
        int totalCores = 0;
        for (NodeReport rep : nodes) {
            final Resource res = rep.getCapability();
            totalMemory += res.getMemory();
            totalCores += res.getVirtualCores();
            ps.format(format, "NodeID", rep.getNodeId());
            ps.format(format, "Memory", res.getMemory() + " MB");
            ps.format(format, "vCores", res.getVirtualCores());
            ps.format(format, "HealthReport", rep.getHealthReport());
            ps.format(format, "Containers", rep.getNumContainers());
            ps.println("+---------------------------------------+");
        }
        ps.println("Summary: totalMemory " + totalMemory + " totalCores " + totalCores);
        List<QueueInfo> qInfo = yarnClient.getAllQueues();
        for (QueueInfo q : qInfo) {
            ps.println("Queue: " + q.getQueueName() + ", Current Capacity: " + q.getCurrentCapacity()
                    + " Max Capacity: " + q.getMaximumCapacity() + " Applications: "
                    + q.getApplications().size());
        }
        yarnClient.stop();
        return baos.toString();
    } catch (Exception e) {
        throw new RuntimeException("Couldn't get cluster description", e);
    }
}

From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java

License:Apache License

/**
 * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213).
 *//*from  w w w  . j  a  va2 s .c  o m*/
@Test(timeout = 100000) // timeout after 100 seconds
public void testTaskManagerFailure() {
    LOG.info("Starting testTaskManagerFailure()");
    Runner runner = startWithArgs(
            new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n",
                    "1", "-jm", "768", "-tm", "1024", "-s", "3", // set the slots 3 to check if the vCores are set properly!
                    "-nm", "customName", "-Dfancy-configuration-value=veryFancy",
                    "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" },
            "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION);

    Assert.assertEquals(2, getRunningContainers());

    // ------------------------ Test if JobManager web interface is accessible -------

    YarnClient yc = null;
    try {
        yc = YarnClient.createYarnClient();
        yc.init(yarnConfiguration);
        yc.start();

        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        Assert.assertEquals(1, apps.size()); // Only one running
        ApplicationReport app = apps.get(0);
        Assert.assertEquals("customName", app.getName());
        String url = app.getTrackingUrl();
        if (!url.endsWith("/")) {
            url += "/";
        }
        if (!url.startsWith("http://")) {
            url = "http://" + url;
        }
        LOG.info("Got application URL from YARN {}", url);

        String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");

        JsonNode parsedTMs = new ObjectMapper().readTree(response);
        ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers");
        Assert.assertNotNull(taskManagers);
        Assert.assertEquals(1, taskManagers.size());
        Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt());

        // get the configuration from webinterface & check if the dynamic properties from YARN show up there.
        String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
        Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig);

        Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
        Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));
        Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES));

        // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
        // first, get the hostname/port
        String oC = outContent.toString();
        Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
        Matcher matches = p.matcher(oC);
        String hostname = null;
        String port = null;
        while (matches.find()) {
            hostname = matches.group(1).toLowerCase();
            port = matches.group(2);
        }
        LOG.info("Extracted hostname:port: {} {}", hostname, port);

        Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname,
                parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
        Assert.assertEquals("unable to find port in " + jsonConfig, port,
                parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));

        // test logfile access
        String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
        Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster"));
        Assert.assertTrue(logs.contains("Starting JobManager"));
        Assert.assertTrue(logs.contains("Starting JobManager Web Frontend"));
    } catch (Throwable e) {
        LOG.warn("Error while running test", e);
        Assert.fail(e.getMessage());
    }

    // ------------------------ Kill container with TaskManager and check if vcores are set correctly -------

    // find container id of taskManager:
    ContainerId taskManagerContainer = null;
    NodeManager nodeManager = null;
    UserGroupInformation remoteUgi = null;
    NMTokenIdentifier nmIdent = null;
    try {
        remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
        LOG.warn("Unable to get curr user", e);
        Assert.fail();
    }
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
        NodeManager nm = yarnCluster.getNodeManager(nmId);
        ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
        for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
            String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
            if (command.contains(YarnTaskManager.class.getSimpleName())) {
                taskManagerContainer = entry.getKey();
                nodeManager = nm;
                nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
                // allow myself to do stuff with the container
                // remoteUgi.addCredentials(entry.getValue().getCredentials());
                remoteUgi.addTokenIdentifier(nmIdent);
            }
        }
        sleep(500);
    }

    Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
    Assert.assertNotNull("Illegal state", nodeManager);

    try {
        List<NodeReport> nodeReports = yc.getNodeReports(NodeState.RUNNING);

        // we asked for one node with 2 vcores so we expect 2 vcores
        int userVcores = 0;
        for (NodeReport rep : nodeReports) {
            userVcores += rep.getUsed().getVirtualCores();
        }
        Assert.assertEquals(2, userVcores);
    } catch (Exception e) {
        Assert.fail("Test failed: " + e.getMessage());
    }

    yc.stop();

    List<ContainerId> toStop = new LinkedList<ContainerId>();
    toStop.add(taskManagerContainer);
    StopContainersRequest scr = StopContainersRequest.newInstance(toStop);

    try {
        nodeManager.getNMContext().getContainerManager().stopContainers(scr);
    } catch (Throwable e) {
        LOG.warn("Error stopping container", e);
        Assert.fail("Error stopping container: " + e.getMessage());
    }

    // stateful termination check:
    // wait until we saw a container being killed and AFTERWARDS a new one launched
    boolean ok = false;
    do {
        LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());

        String o = errContent.toString();
        int killedOff = o.indexOf("Container killed by the ApplicationMaster");
        if (killedOff != -1) {
            o = o.substring(killedOff);
            ok = o.indexOf("Launching TaskManager") > 0;
        }
        sleep(1000);
    } while (!ok);

    // send "stop" command to command line interface
    runner.sendStop();
    // wait for the thread to stop
    try {
        runner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Interrupted while stopping runner", e);
    }
    LOG.warn("stopped");

    // ----------- Send output to logger
    System.setOut(originalStdout);
    System.setErr(originalStderr);
    String oC = outContent.toString();
    String eC = errContent.toString();
    LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
    LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);

    // ------ Check if everything happened correctly
    Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster"));

    Assert.assertTrue("Expect to see failed container",
            eC.contains("Container killed by the ApplicationMaster"));

    Assert.assertTrue("Expect to see new container started",
            eC.contains("Launching TaskManager") && eC.contains("on host"));

    // cleanup auth for the subsequent tests.
    remoteUgi.getTokenIdentifiers().remove(nmIdent);

    LOG.info("Finished testTaskManagerFailure()");
}