List of usage examples for org.apache.hadoop.yarn.client.api YarnClient getNodeReports
public abstract List<NodeReport> getNodeReports(NodeState... states) throws YarnException, IOException;
Get a report of nodes ( NodeReport ) in the cluster.
From source file:alluxio.yarn.YarnUtils.java
License:Apache License
/** * Returns the host names for all nodes in yarnClient's YARN cluster. * * @param yarnClient the client to use to look up node information * @return the set of host names/*from w ww. j av a 2s . c o m*/ * @throws YarnException if an error occurs within YARN * @throws IOException if an error occurs in YARN's underlying IO */ public static Set<String> getNodeHosts(YarnClient yarnClient) throws YarnException, IOException { ImmutableSet.Builder<String> nodeHosts = ImmutableSet.builder(); for (NodeReport runningNode : yarnClient.getNodeReports(USABLE_NODE_STATES)) { nodeHosts.add(runningNode.getNodeId().getHost()); } return nodeHosts.build(); }
From source file:cn.edu.buaa.act.petuumOnYarn.ApplicationMaster.java
License:Apache License
private boolean getAvaliableNodes() { List<NodeReport> clusterNodeReports; try {/*from ww w . j a va2 s . c o m*/ YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); for (NodeReport node : clusterNodeReports) { LOG.info("node infos:" + node.getHttpAddress()); } avaliableNodeList = new ArrayList<NodeReport>(); if (numNodes <= clusterNodeReports.size()) { for (NodeReport node : clusterNodeReports) { if (node.getCapability().getMemory() >= containerMemory && node.getCapability().getVirtualCores() >= containerVirtualCores) { avaliableNodeList.add(node); } } if (avaliableNodeList.size() >= numNodes) numTotalContainers = numNodes; else { LOG.error("Resource isn't enough"); return false; } } else { LOG.error("cluster nodes isn't enough"); return false; } } catch (Exception e) { LOG.error(e.getMessage()); LOG.error(e.getStackTrace()); return false; } return true; }
From source file:com.cloudera.llama.am.MiniLlama.java
License:Apache License
private List<NodeId> getYarnNodeIds(Configuration conf) throws Exception { List<NodeId> list = new ArrayList<NodeId>(); if (miniYarn != null) { int clusterNodes = getConf().getInt(MINI_CLUSTER_NODES_KEY, 1); for (int i = 0; i < clusterNodes; i++) { list.add(miniYarn.getNodeManager(i).getNMContext().getNodeId()); }//from w w w. ja v a 2 s . c o m } else { YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING); for (int i = 0; i < nodes.size(); i++) { list.add(nodes.get(i).getNodeId()); } yarnClient.stop(); } return list; }
From source file:com.flyhz.avengers.framework.application.InitEnvApplication.java
License:Apache License
private void initJar() { LOG.info("initJar"); numTotalContainers = 1;/*from ww w . ja va2 s . co m*/ containerMemory = 10; if (numTotalContainers == 0) { throw new IllegalArgumentException("Cannot run distributed shell with no containers"); } requestPriority = 0; YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); List<NodeReport> clusterNodeReports; try { clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got all node!"); for (NodeReport node : clusterNodeReports) { nodeSet.add(node.getNodeId().getHost()); } LOG.info("initJar Completed setting up app master command {}", initJarCmd); numTotalContainers = nodeSet.size(); for (int i = 0; i < numTotalContainers; i++) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } numRequestedContainers.set(numTotalContainers); } catch (YarnException e) { LOG.error("initJarAndClasspath", e); } catch (IOException e) { LOG.error("initJarAndClasspath", e); } finally { try { yarnClient.close(); } catch (IOException e) { } } }
From source file:com.flyhz.avengers.framework.AvengersAppMaster.java
License:Apache License
private void initJar() { LOG.info("initJar"); initCommon();//from w w w.j av a2s .c om this.currentProcess = "initJar"; YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); List<NodeReport> clusterNodeReports; try { clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got all node!"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); nodeSet.add(node.getNodeId().getHost()); } Vector<CharSequence> vargs = new Vector<CharSequence>(30); String appTempDir = conf.get("hadoop.tmp.dir"); FileSystem fs = DistributedFileSystem.get(conf); String hdfsJar = fs.getHomeDirectory() + "/avengers/" + this.appAttemptID.getApplicationId().getId() + "/AvengersAppMaster.jar"; vargs.add("if [ ! -e " + appTempDir + "/" + this.appAttemptID.getApplicationId().getId() + "/AvengersAppMaster.jar" + " ];"); vargs.add("then"); vargs.add("/bin/mkdir -p"); vargs.add(appTempDir + "/" + this.appAttemptID.getApplicationId().getId() + ";"); vargs.add(Environment.HADOOP_YARN_HOME.$() + "/bin/hadoop"); vargs.add("fs"); vargs.add("-copyToLocal"); vargs.add(hdfsJar); vargs.add(appTempDir + "/" + this.appAttemptID.getApplicationId().getId() + ";"); vargs.add("fi"); // Add log redirect params vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Avengers.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Avengers.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } initJarCmd = command.toString(); LOG.info("initJar Completed setting up app master command {}", initJarCmd); numTotalContainers = nodeSet.size(); for (int i = 0; i < numTotalContainers; i++) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } numRequestedContainers.set(numTotalContainers); while (!done && (numCompletedContainers.get() != numTotalContainers)) { try { Thread.sleep(200); } catch (InterruptedException ex) { } } } catch (YarnException e) { LOG.error("initJarAndClasspath", e); } catch (IOException e) { LOG.error("initJarAndClasspath", e); } finally { try { yarnClient.close(); } catch (IOException e) { } } }
From source file:eu.stratosphere.yarn.Client.java
License:Apache License
private ClusterResourceDescription getCurrentFreeClusterResources(YarnClient yarnClient) throws YarnException, IOException { ClusterResourceDescription crd = new ClusterResourceDescription(); crd.totalFreeMemory = 0;//w ww .j av a 2 s. com crd.containerLimit = 0; List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING); for (NodeReport rep : nodes) { int free = rep.getCapability().getMemory() - (rep.getUsed() != null ? rep.getUsed().getMemory() : 0); crd.totalFreeMemory += free; if (free > crd.containerLimit) { crd.containerLimit = free; } } return crd; }
From source file:eu.stratosphere.yarn.Client.java
License:Apache License
private void showClusterMetrics(YarnClient yarnClient) throws YarnException, IOException { YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics(); System.out.println("NodeManagers in the Cluster " + metrics.getNumNodeManagers()); List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING); final String format = "|%-16s |%-16s %n"; System.out.printf("|Property |Value %n"); System.out.println("+---------------------------------------+"); int totalMemory = 0; int totalCores = 0; for (NodeReport rep : nodes) { final Resource res = rep.getCapability(); totalMemory += res.getMemory();/*from w ww . j a v a 2s. co m*/ totalCores += res.getVirtualCores(); System.out.format(format, "NodeID", rep.getNodeId()); System.out.format(format, "Memory", res.getMemory() + " MB"); System.out.format(format, "vCores", res.getVirtualCores()); System.out.format(format, "HealthReport", rep.getHealthReport()); System.out.format(format, "Containers", rep.getNumContainers()); System.out.println("+---------------------------------------+"); } System.out.println("Summary: totalMemory " + totalMemory + " totalCores " + totalCores); List<QueueInfo> qInfo = yarnClient.getAllQueues(); for (QueueInfo q : qInfo) { System.out.println("Queue: " + q.getQueueName() + ", Current Capacity: " + q.getCurrentCapacity() + " Max Capacity: " + q.getMaximumCapacity() + " Applications: " + q.getApplications().size()); } yarnClient.stop(); System.exit(0); }
From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java
License:Apache License
private ClusterResourceDescription getCurrentFreeClusterResources(YarnClient yarnClient) throws YarnException, IOException { List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING); int totalFreeMemory = 0; int containerLimit = 0; int[] nodeManagersFree = new int[nodes.size()]; for (int i = 0; i < nodes.size(); i++) { NodeReport rep = nodes.get(i);/*from w w w. ja v a2s.c o m*/ int free = rep.getCapability().getMemory() - (rep.getUsed() != null ? rep.getUsed().getMemory() : 0); nodeManagersFree[i] = free; totalFreeMemory += free; if (free > containerLimit) { containerLimit = free; } } return new ClusterResourceDescription(totalFreeMemory, containerLimit, nodeManagersFree); }
From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java
License:Apache License
@Override public String getClusterDescription() { try {/*from ww w. j a v a 2 s . c om*/ ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); YarnClient yarnClient = getYarnClient(); YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics(); ps.append("NodeManagers in the ClusterClient " + metrics.getNumNodeManagers()); List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING); final String format = "|%-16s |%-16s %n"; ps.printf("|Property |Value %n"); ps.println("+---------------------------------------+"); int totalMemory = 0; int totalCores = 0; for (NodeReport rep : nodes) { final Resource res = rep.getCapability(); totalMemory += res.getMemory(); totalCores += res.getVirtualCores(); ps.format(format, "NodeID", rep.getNodeId()); ps.format(format, "Memory", res.getMemory() + " MB"); ps.format(format, "vCores", res.getVirtualCores()); ps.format(format, "HealthReport", rep.getHealthReport()); ps.format(format, "Containers", rep.getNumContainers()); ps.println("+---------------------------------------+"); } ps.println("Summary: totalMemory " + totalMemory + " totalCores " + totalCores); List<QueueInfo> qInfo = yarnClient.getAllQueues(); for (QueueInfo q : qInfo) { ps.println("Queue: " + q.getQueueName() + ", Current Capacity: " + q.getCurrentCapacity() + " Max Capacity: " + q.getMaximumCapacity() + " Applications: " + q.getApplications().size()); } yarnClient.stop(); return baos.toString(); } catch (Exception e) { throw new RuntimeException("Couldn't get cluster description", e); } }
From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java
License:Apache License
/** * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213). *//*from w w w . j a va2 s .c o m*/ @Test(timeout = 100000) // timeout after 100 seconds public void testTaskManagerFailure() { LOG.info("Starting testTaskManagerFailure()"); Runner runner = startWithArgs( new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "-s", "3", // set the slots 3 to check if the vCores are set properly! "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" }, "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION); Assert.assertEquals(2, getRunningContainers()); // ------------------------ Test if JobManager web interface is accessible ------- YarnClient yc = null; try { yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); Assert.assertEquals(1, apps.size()); // Only one running ApplicationReport app = apps.get(0); Assert.assertEquals("customName", app.getName()); String url = app.getTrackingUrl(); if (!url.endsWith("/")) { url += "/"; } if (!url.startsWith("http://")) { url = "http://" + url; } LOG.info("Got application URL from YARN {}", url); String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/"); JsonNode parsedTMs = new ObjectMapper().readTree(response); ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers"); Assert.assertNotNull(taskManagers); Assert.assertEquals(1, taskManagers.size()); Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt()); // get the configuration from webinterface & check if the dynamic properties from YARN show up there. String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config"); Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig); Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value")); Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers")); Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES)); // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface // first, get the hostname/port String oC = outContent.toString(); Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)"); Matcher matches = p.matcher(oC); String hostname = null; String port = null; while (matches.find()) { hostname = matches.group(1).toLowerCase(); port = matches.group(2); } LOG.info("Extracted hostname:port: {} {}", hostname, port); Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)); Assert.assertEquals("unable to find port in " + jsonConfig, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY)); // test logfile access String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log"); Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster")); Assert.assertTrue(logs.contains("Starting JobManager")); Assert.assertTrue(logs.contains("Starting JobManager Web Frontend")); } catch (Throwable e) { LOG.warn("Error while running test", e); Assert.fail(e.getMessage()); } // ------------------------ Kill container with TaskManager and check if vcores are set correctly ------- // find container id of taskManager: ContainerId taskManagerContainer = null; NodeManager nodeManager = null; UserGroupInformation remoteUgi = null; NMTokenIdentifier nmIdent = null; try { remoteUgi = UserGroupInformation.getCurrentUser(); } catch (IOException e) { LOG.warn("Unable to get curr user", e); Assert.fail(); } for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) { NodeManager nm = yarnCluster.getNodeManager(nmId); ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers(); for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) { String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands()); if (command.contains(YarnTaskManager.class.getSimpleName())) { taskManagerContainer = entry.getKey(); nodeManager = nm; nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0); // allow myself to do stuff with the container // remoteUgi.addCredentials(entry.getValue().getCredentials()); remoteUgi.addTokenIdentifier(nmIdent); } } sleep(500); } Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer); Assert.assertNotNull("Illegal state", nodeManager); try { List<NodeReport> nodeReports = yc.getNodeReports(NodeState.RUNNING); // we asked for one node with 2 vcores so we expect 2 vcores int userVcores = 0; for (NodeReport rep : nodeReports) { userVcores += rep.getUsed().getVirtualCores(); } Assert.assertEquals(2, userVcores); } catch (Exception e) { Assert.fail("Test failed: " + e.getMessage()); } yc.stop(); List<ContainerId> toStop = new LinkedList<ContainerId>(); toStop.add(taskManagerContainer); StopContainersRequest scr = StopContainersRequest.newInstance(toStop); try { nodeManager.getNMContext().getContainerManager().stopContainers(scr); } catch (Throwable e) { LOG.warn("Error stopping container", e); Assert.fail("Error stopping container: " + e.getMessage()); } // stateful termination check: // wait until we saw a container being killed and AFTERWARDS a new one launched boolean ok = false; do { LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString()); String o = errContent.toString(); int killedOff = o.indexOf("Container killed by the ApplicationMaster"); if (killedOff != -1) { o = o.substring(killedOff); ok = o.indexOf("Launching TaskManager") > 0; } sleep(1000); } while (!ok); // send "stop" command to command line interface runner.sendStop(); // wait for the thread to stop try { runner.join(1000); } catch (InterruptedException e) { LOG.warn("Interrupted while stopping runner", e); } LOG.warn("stopped"); // ----------- Send output to logger System.setOut(originalStdout); System.setErr(originalStderr); String oC = outContent.toString(); String eC = errContent.toString(); LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC); LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC); // ------ Check if everything happened correctly Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster")); Assert.assertTrue("Expect to see failed container", eC.contains("Container killed by the ApplicationMaster")); Assert.assertTrue("Expect to see new container started", eC.contains("Launching TaskManager") && eC.contains("on host")); // cleanup auth for the subsequent tests. remoteUgi.getTokenIdentifiers().remove(nmIdent); LOG.info("Finished testTaskManagerFailure()"); }