List of usage examples for org.apache.hadoop.yarn.api.records NodeReport getUsed
@Public @Stable public abstract Resource getUsed();
Resource
on the node. From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java
License:Apache License
private ClusterResourceDescription getCurrentFreeClusterResources(YarnClient yarnClient) throws YarnException, IOException { List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING); int totalFreeMemory = 0; int containerLimit = 0; int[] nodeManagersFree = new int[nodes.size()]; for (int i = 0; i < nodes.size(); i++) { NodeReport rep = nodes.get(i); int free = rep.getCapability().getMemory() - (rep.getUsed() != null ? rep.getUsed().getMemory() : 0); nodeManagersFree[i] = free;/*w w w .j a va 2 s . c o m*/ totalFreeMemory += free; if (free > containerLimit) { containerLimit = free; } } return new ClusterResourceDescription(totalFreeMemory, containerLimit, nodeManagersFree); }
From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java
License:Apache License
/** * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213). *///from w w w . j a v a2 s . c om @Test(timeout = 100000) // timeout after 100 seconds public void testTaskManagerFailure() { LOG.info("Starting testTaskManagerFailure()"); Runner runner = startWithArgs( new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "-s", "3", // set the slots 3 to check if the vCores are set properly! "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" }, "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION); Assert.assertEquals(2, getRunningContainers()); // ------------------------ Test if JobManager web interface is accessible ------- YarnClient yc = null; try { yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); Assert.assertEquals(1, apps.size()); // Only one running ApplicationReport app = apps.get(0); Assert.assertEquals("customName", app.getName()); String url = app.getTrackingUrl(); if (!url.endsWith("/")) { url += "/"; } if (!url.startsWith("http://")) { url = "http://" + url; } LOG.info("Got application URL from YARN {}", url); String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/"); JsonNode parsedTMs = new ObjectMapper().readTree(response); ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers"); Assert.assertNotNull(taskManagers); Assert.assertEquals(1, taskManagers.size()); Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt()); // get the configuration from webinterface & check if the dynamic properties from YARN show up there. String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config"); Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig); Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value")); Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers")); Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES)); // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface // first, get the hostname/port String oC = outContent.toString(); Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)"); Matcher matches = p.matcher(oC); String hostname = null; String port = null; while (matches.find()) { hostname = matches.group(1).toLowerCase(); port = matches.group(2); } LOG.info("Extracted hostname:port: {} {}", hostname, port); Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)); Assert.assertEquals("unable to find port in " + jsonConfig, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY)); // test logfile access String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log"); Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster")); Assert.assertTrue(logs.contains("Starting JobManager")); Assert.assertTrue(logs.contains("Starting JobManager Web Frontend")); } catch (Throwable e) { LOG.warn("Error while running test", e); Assert.fail(e.getMessage()); } // ------------------------ Kill container with TaskManager and check if vcores are set correctly ------- // find container id of taskManager: ContainerId taskManagerContainer = null; NodeManager nodeManager = null; UserGroupInformation remoteUgi = null; NMTokenIdentifier nmIdent = null; try { remoteUgi = UserGroupInformation.getCurrentUser(); } catch (IOException e) { LOG.warn("Unable to get curr user", e); Assert.fail(); } for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) { NodeManager nm = yarnCluster.getNodeManager(nmId); ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers(); for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) { String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands()); if (command.contains(YarnTaskManager.class.getSimpleName())) { taskManagerContainer = entry.getKey(); nodeManager = nm; nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0); // allow myself to do stuff with the container // remoteUgi.addCredentials(entry.getValue().getCredentials()); remoteUgi.addTokenIdentifier(nmIdent); } } sleep(500); } Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer); Assert.assertNotNull("Illegal state", nodeManager); try { List<NodeReport> nodeReports = yc.getNodeReports(NodeState.RUNNING); // we asked for one node with 2 vcores so we expect 2 vcores int userVcores = 0; for (NodeReport rep : nodeReports) { userVcores += rep.getUsed().getVirtualCores(); } Assert.assertEquals(2, userVcores); } catch (Exception e) { Assert.fail("Test failed: " + e.getMessage()); } yc.stop(); List<ContainerId> toStop = new LinkedList<ContainerId>(); toStop.add(taskManagerContainer); StopContainersRequest scr = StopContainersRequest.newInstance(toStop); try { nodeManager.getNMContext().getContainerManager().stopContainers(scr); } catch (Throwable e) { LOG.warn("Error stopping container", e); Assert.fail("Error stopping container: " + e.getMessage()); } // stateful termination check: // wait until we saw a container being killed and AFTERWARDS a new one launched boolean ok = false; do { LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString()); String o = errContent.toString(); int killedOff = o.indexOf("Container killed by the ApplicationMaster"); if (killedOff != -1) { o = o.substring(killedOff); ok = o.indexOf("Launching TaskManager") > 0; } sleep(1000); } while (!ok); // send "stop" command to command line interface runner.sendStop(); // wait for the thread to stop try { runner.join(1000); } catch (InterruptedException e) { LOG.warn("Interrupted while stopping runner", e); } LOG.warn("stopped"); // ----------- Send output to logger System.setOut(originalStdout); System.setErr(originalStderr); String oC = outContent.toString(); String eC = errContent.toString(); LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC); LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC); // ------ Check if everything happened correctly Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster")); Assert.assertTrue("Expect to see failed container", eC.contains("Container killed by the ApplicationMaster")); Assert.assertTrue("Expect to see new container started", eC.contains("Launching TaskManager") && eC.contains("on host")); // cleanup auth for the subsequent tests. remoteUgi.getTokenIdentifiers().remove(nmIdent); LOG.info("Finished testTaskManagerFailure()"); }
From source file:org.apache.sysml.yarn.ropt.YarnClusterAnalyzer.java
License:Apache License
public static double getClusterUtilization() throws IOException { double util = 0; try {/* w w w . java 2s . c o m*/ if (_client == null) _client = createYarnClient(); List<NodeReport> nodesReport = _client.getNodeReports(); double maxMem = 0; double currMem = 0; long maxCores = 0; long currCores = 0; for (NodeReport node : nodesReport) { Resource max = node.getCapability(); Resource used = node.getUsed(); maxMem += max.getMemory(); currMem += used.getMemory(); maxCores += max.getVirtualCores(); currCores += used.getVirtualCores(); } util = Math.max(Math.min(1, currMem / maxMem), //memory util Math.min(1, (double) currCores / maxCores)); //vcore util } catch (Exception ex) { throw new IOException(ex); } return util; }
From source file:org.apache.sysml.yarn.ropt.YarnClusterAnalyzer.java
License:Apache License
/** * Analyzes properties of Yarn cluster and Hadoop configurations. * /*from w w w. jav a 2 s . c o m*/ * @param yarnClient hadoop yarn client * @param conf hadoop yarn configuration * @param verbose output info to standard output */ public static void analyzeYarnCluster(YarnClient yarnClient, YarnConfiguration conf, boolean verbose) { try { List<NodeReport> nodesReport = yarnClient.getNodeReports(); if (verbose) System.out.println("There are " + nodesReport.size() + " nodes in the cluster"); if (nodesReport.isEmpty()) throw new YarnException("There are zero available nodes in the yarn cluster"); nodesMaxPhySorted = new ArrayList<>(nodesReport.size()); clusterTotalMem = 0; clusterTotalCores = 0; clusterTotalNodes = 0; minimumMRContainerPhyMB = -1; for (NodeReport node : nodesReport) { Resource resource = node.getCapability(); Resource used = node.getUsed(); if (used == null) used = Resource.newInstance(0, 0); int mb = resource.getMemory(); int cores = resource.getVirtualCores(); if (mb <= 0) throw new YarnException("A node has non-positive memory " + mb); int myMinMRPhyMB = mb / cores / CPU_HYPER_FACTOR; if (minimumMRContainerPhyMB < myMinMRPhyMB) minimumMRContainerPhyMB = myMinMRPhyMB; // minimumMRContainerPhyMB needs to be the largest among the mins clusterTotalMem += (long) mb * 1024 * 1024; nodesMaxPhySorted.add((long) mb * 1024 * 1024); clusterTotalCores += cores; clusterTotalNodes++; if (verbose) System.out.println("\t" + node.getNodeId() + " has " + mb + " MB (" + used.getMemory() + " MB used) memory and " + resource.getVirtualCores() + " (" + used.getVirtualCores() + " used) cores"); } Collections.sort(nodesMaxPhySorted, Collections.reverseOrder()); nodesMaxBudgetSorted = new ArrayList<>(nodesMaxPhySorted.size()); for (int i = 0; i < nodesMaxPhySorted.size(); i++) nodesMaxBudgetSorted.add(ResourceOptimizer.phyToBudget(nodesMaxPhySorted.get(i))); _remotePar = nodesReport.size(); if (_remotePar == 0) throw new YarnException("There are no available nodes in the yarn cluster"); // Now get the default cluster settings _remoteMRSortMem = (1024 * 1024) * conf.getLong(MRConfigurationNames.MR_TASK_IO_SORT_MB, 100); //100MB //handle jvm max mem (map mem budget is relevant for map-side distcache and parfor) //(for robustness we probe both: child and map configuration parameters) String javaOpts1 = conf.get(MRConfigurationNames.MR_CHILD_JAVA_OPTS); //internally mapred/mapreduce synonym String javaOpts2 = conf.get(MRConfigurationNames.MR_MAP_JAVA_OPTS, null); //internally mapred/mapreduce synonym String javaOpts3 = conf.get(MRConfigurationNames.MR_REDUCE_JAVA_OPTS, null); //internally mapred/mapreduce synonym if (javaOpts2 != null) //specific value overrides generic _remoteJVMMaxMemMap = extractMaxMemoryOpt(javaOpts2); else _remoteJVMMaxMemMap = extractMaxMemoryOpt(javaOpts1); if (javaOpts3 != null) //specific value overrides generic _remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts3); else _remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts1); //HDFS blocksize String blocksize = conf.get(MRConfigurationNames.DFS_BLOCKSIZE, "134217728"); _blocksize = Long.parseLong(blocksize); minimalPhyAllocate = (long) 1024 * 1024 * conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); maximumPhyAllocate = (long) 1024 * 1024 * conf.getInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB); mrAMPhy = (long) conf.getInt(MRConfigurationNames.YARN_APP_MR_AM_RESOURCE_MB, 1536) * 1024 * 1024; } catch (Exception e) { throw new RuntimeException("Unable to analyze yarn cluster ", e); } /* * This is for AppMaster to query available resource in the cluster during heartbeat * AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient(); rmClient.init(conf); rmClient.start(); AllocateResponse response = rmClient.allocate(0); int nodeCount = response.getNumClusterNodes(); Resource resource = response.getAvailableResources(); List<NodeReport> nodeUpdate = response.getUpdatedNodes(); LOG.info("This is a " + nodeCount + " node cluster with totally " + resource.getMemory() + " memory and " + resource.getVirtualCores() + " cores"); LOG.info(nodereport.size() + " updatedNode reports received"); for (NodeReport node : nodeUpdate) { resource = node.getCapability(); LOG.info(node.getNodeId() + " updated with " + resource.getMemory() + " memory and " + resource.getVirtualCores() + " cores"); }*/ }
From source file:org.apache.twill.yarn.PlacementPolicyTestRun.java
License:Apache License
/** * Verify the cluster configuration (number and capability of node managers) required for the tests. *///from ww w.java 2 s.co m @BeforeClass public static void verifyClusterCapability() throws InterruptedException { // Ignore verifications if it is running against older Hadoop versions which does not support blacklists. Assume.assumeTrue(YarnUtils.getHadoopVersion().equals(YarnUtils.HadoopVersions.HADOOP_22)); // All runnables in this test class use same resource specification for the sake of convenience. resource = ResourceSpecification.Builder.with().setVirtualCores(RUNNABLE_CORES) .setMemory(RUNNABLE_MEMORY, ResourceSpecification.SizeUnit.MEGA).build(); twoInstancesResource = ResourceSpecification.Builder.with().setVirtualCores(RUNNABLE_CORES) .setMemory(RUNNABLE_MEMORY, ResourceSpecification.SizeUnit.MEGA).setInstances(2).build(); // The tests need exactly three NodeManagers in the cluster. int trials = 0; while (trials++ < 20) { try { nodeReports = TWILL_TESTER.getNodeReports(); if (nodeReports != null && nodeReports.size() == 3) { break; } } catch (Exception e) { LOG.error("Failed to get node reports", e); } LOG.warn("NodeManagers != 3. {}", nodeReports); TimeUnit.SECONDS.sleep(1); } // All NodeManagers should have enough capacity available to accommodate at least two runnables. for (NodeReport nodeReport : nodeReports) { Resource capability = nodeReport.getCapability(); Resource used = nodeReport.getUsed(); Assert.assertNotNull(capability); if (used != null) { Assert.assertTrue(2 * resource.getMemorySize() < capability.getMemory() - used.getMemory()); } else { Assert.assertTrue(2 * resource.getMemorySize() < capability.getMemory()); } } }
From source file:org.apache.twill.yarn.PlacementPolicyTestRun.java
License:Apache License
/** * Helper function to verify DISTRIBUTED placement policies. * Returns the number of NodeManagers on which runnables got provisioned. * @return number of NodeManagers on which runnables got provisioned. *///from w ww. j av a 2s. c o m private int getProvisionedNodeManagerCount() throws Exception { int provisionedNodeManagerCount = 0; for (NodeReport nodeReport : getNodeReports()) { Resource used = nodeReport.getUsed(); if (used != null && used.getMemory() > 0) { provisionedNodeManagerCount++; } } return provisionedNodeManagerCount; }
From source file:org.huahinframework.manager.rest.service.ApplicationService.java
License:Apache License
@Path("/cluster") @GET/*www . ja v a 2s . com*/ @Produces(MediaType.APPLICATION_JSON) public JSONObject getCluster() { JSONObject jsonObject = new JSONObject(); try { GetClusterMetricsRequest metricsRequest = recordFactory .newRecordInstance(GetClusterMetricsRequest.class); GetClusterMetricsResponse metricsResponse = applicationsManager.getClusterMetrics(metricsRequest); jsonObject.put(Response.NUM_NODE_MANAGERS, metricsResponse.getClusterMetrics().getNumNodeManagers()); GetClusterNodesRequest nodeRequest = recordFactory.newRecordInstance(GetClusterNodesRequest.class); GetClusterNodesResponse nodeResponse = applicationsManager.getClusterNodes(nodeRequest); List<JSONObject> reports = new ArrayList<JSONObject>(); for (NodeReport report : nodeResponse.getNodeReports()) { JSONObject nr = new JSONObject(); nr.put(Response.HTTP_ADDRESS, report.getHttpAddress()); nr.put(Response.NUM_CONTAINERS, report.getNumContainers()); nr.put(Response.RACK_NAME, report.getRackName()); nr.put(Response.CAPABILITY, report.getCapability().getMemory()); nr.put(Response.HEALTH_REPORT, report.getNodeHealthStatus().getHealthReport()); nr.put(Response.IS_NODE_HEALTHY, report.getNodeHealthStatus().getIsNodeHealthy()); nr.put(Response.LAST_HEALTH_REPORT_TIME, new Date(report.getNodeHealthStatus().getLastHealthReportTime())); nr.put(Response.NODE_ID, report.getNodeId()); nr.put(Response.NODE_STATE, report.getNodeState()); nr.put(Response.NODE_STATE, report.getNodeState()); nr.put(Response.USED, report.getUsed()); reports.add(nr); } jsonObject.put(Response.NODES, reports); } catch (Exception e) { e.printStackTrace(); log.error(e); Map<String, String> status = new HashMap<String, String>(); status.put(Response.STATUS, e.getMessage()); jsonObject = new JSONObject(status); } return jsonObject; }
From source file:runtime.starter.MPJYarnClient.java
License:Open Source License
public void run() throws Exception { Map<String, String> map = System.getenv(); try {// ww w .java 2s . c o m mpjHomeDir = map.get("MPJ_HOME"); if (mpjHomeDir == null) { throw new Exception("[MPJRun.java]:MPJ_HOME environment found.."); } } catch (Exception exc) { System.out.println("[MPJRun.java]:" + exc.getMessage()); exc.printStackTrace(); return; } // Copy the application master jar to HDFS // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); /* Path dataset = new Path(fs.getHomeDirectory(),"/dataset"); FileStatus datasetFile = fs.getFileStatus(dataset); BlockLocation myBlocks [] = fs.getFileBlockLocations(datasetFile,0,datasetFile.getLen()); for(BlockLocation b : myBlocks){ System.out.println("\n--------------------"); System.out.println("Length "+b.getLength()); for(String host : b.getHosts()){ System.out.println("host "+host); } } */ Path source = new Path(mpjHomeDir + "/lib/mpj-app-master.jar"); String pathSuffix = hdfsFolder + "mpj-app-master.jar"; Path dest = new Path(fs.getHomeDirectory(), pathSuffix); if (debugYarn) { logger.info("Uploading mpj-app-master.jar to: " + dest.toString()); } fs.copyFromLocalFile(false, true, source, dest); FileStatus destStatus = fs.getFileStatus(dest); Path wrapperSource = new Path(mpjHomeDir + "/lib/mpj-yarn-wrapper.jar"); String wrapperSuffix = hdfsFolder + "mpj-yarn-wrapper.jar"; Path wrapperDest = new Path(fs.getHomeDirectory(), wrapperSuffix); if (debugYarn) { logger.info("Uploading mpj-yarn-wrapper.jar to: " + wrapperDest.toString()); } fs.copyFromLocalFile(false, true, wrapperSource, wrapperDest); Path userJar = new Path(jarPath); String userJarSuffix = hdfsFolder + "user-code.jar"; Path userJarDest = new Path(fs.getHomeDirectory(), userJarSuffix); if (debugYarn) { logger.info("Uploading user-code.jar to: " + userJarDest.toString()); } fs.copyFromLocalFile(false, true, userJar, userJarDest); YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); if (debugYarn) { YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics(); logger.info("\nNodes Information"); logger.info("Number of NM: " + metrics.getNumNodeManagers() + "\n"); List<NodeReport> nodeReports = yarnClient.getNodeReports(NodeState.RUNNING); for (NodeReport n : nodeReports) { logger.info("NodeId: " + n.getNodeId()); logger.info("RackName: " + n.getRackName()); logger.info("Total Memory: " + n.getCapability().getMemory()); logger.info("Used Memory: " + n.getUsed().getMemory()); logger.info("Total vCores: " + n.getCapability().getVirtualCores()); logger.info("Used vCores: " + n.getUsed().getVirtualCores() + "\n"); } } logger.info("Creating server socket at HOST " + serverName + " PORT " + serverPort + " \nWaiting for " + np + " processes to connect..."); // Creating a server socket for incoming connections try { servSock = new ServerSocket(serverPort); infoSock = new ServerSocket(); TEMP_PORT = findPort(infoSock); } catch (Exception e) { e.printStackTrace(); } // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); int maxMem = appResponse.getMaximumResourceCapability().getMemory(); if (debugYarn) { logger.info("Max memory capability resources in cluster: " + maxMem); } if (amMem > maxMem) { amMem = maxMem; logger.info("AM memory specified above threshold of cluster " + "Using maximum memory for AM container: " + amMem); } int maxVcores = appResponse.getMaximumResourceCapability().getVirtualCores(); if (debugYarn) { logger.info("Max vCores capability resources in cluster: " + maxVcores); } if (amCores > maxVcores) { amCores = maxVcores; logger.info("AM virtual cores specified above threshold of cluster " + "Using maximum virtual cores for AM container: " + amCores); } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); List<String> commands = new ArrayList<String>(); commands.add("$JAVA_HOME/bin/java"); commands.add("-Xmx" + amMem + "m"); commands.add("runtime.starter.MPJAppMaster"); commands.add("--np"); commands.add(String.valueOf(np)); commands.add("--serverName"); commands.add(serverName); //server name commands.add("--ioServerPort"); commands.add(Integer.toString(serverPort)); //server port commands.add("--deviceName"); commands.add(deviceName); //device name commands.add("--className"); commands.add(className); //class name commands.add("--wdir"); commands.add(workingDirectory); //wdir commands.add("--psl"); commands.add(Integer.toString(psl)); //protocol switch limit commands.add("--wireUpPort"); commands.add(String.valueOf(TEMP_PORT)); //for sharing ports & rank commands.add("--wrapperPath"); commands.add(wrapperDest.toString());//MPJYarnWrapper.jar HDFS path commands.add("--userJarPath"); commands.add(userJarDest.toString());//User Jar File HDFS path commands.add("--mpjContainerPriority"); commands.add(mpjContainerPriority);// priority for mpj containers commands.add("--containerMem"); commands.add(containerMem); commands.add("--containerCores"); commands.add(containerCores); if (debugYarn) { commands.add("--debugYarn"); } if (appArgs != null) { commands.add("--appArgs"); for (int i = 0; i < appArgs.length; i++) { commands.add(appArgs[i]); } } amContainer.setCommands(commands); //set commands // Setup local Resource for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); appMasterJar.setResource(ConverterUtils.getYarnUrlFromPath(dest)); appMasterJar.setSize(destStatus.getLen()); appMasterJar.setTimestamp(destStatus.getModificationTime()); appMasterJar.setType(LocalResourceType.ARCHIVE); appMasterJar.setVisibility(LocalResourceVisibility.APPLICATION); amContainer.setLocalResources(Collections.singletonMap("mpj-app-master.jar", appMasterJar)); // Setup CLASSPATH for ApplicationMaster // Setting up the environment Map<String, String> appMasterEnv = new HashMap<String, String>(); setupAppMasterEnv(appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMem); capability.setVirtualCores(amCores); // Finally, set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName(appName); appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue(yarnQueue); // queue Priority priority = Priority.newInstance(amPriority); appContext.setPriority(priority); ApplicationId appId = appContext.getApplicationId(); //Adding ShutDown Hook Runtime.getRuntime().addShutdownHook(new KillYarnApp(appId, yarnClient)); // Submit application System.out.println("Submitting Application: " + appContext.getApplicationName() + "\n"); try { isRunning = true; yarnClient.submitApplication(appContext); } catch (Exception exp) { System.err.println("Error Submitting Application"); exp.printStackTrace(); } // np = number of processes , + 1 for Application Master container IOMessagesThread[] ioThreads = new IOMessagesThread[np + 1]; peers = new String[np]; socketList = new Vector<Socket>(); int wport = 0; int rport = 0; int rank = 0; // np + 1 IOThreads for (int i = 0; i < (np + 1); i++) { try { sock = servSock.accept(); //start IO thread to read STDOUT and STDERR from wrappers IOMessagesThread io = new IOMessagesThread(sock); ioThreads[i] = io; ioThreads[i].start(); } catch (Exception e) { System.err.println("Error accepting connection from peer socket.."); e.printStackTrace(); } } // Loop to read port numbers from Wrapper.java processes // and to create WRAPPER_INFO (containing all IPs and ports) String WRAPPER_INFO = "#Peer Information"; for (int i = np; i > 0; i--) { try { sock = infoSock.accept(); DataOutputStream out = new DataOutputStream(sock.getOutputStream()); DataInputStream in = new DataInputStream(sock.getInputStream()); if (in.readUTF().startsWith("Sending Info")) { wport = in.readInt(); rport = in.readInt(); rank = in.readInt(); peers[rank] = ";" + sock.getInetAddress().getHostAddress() + "@" + rport + "@" + wport + "@" + rank; socketList.add(sock); } } catch (Exception e) { System.err.println("[MPJYarnClient.java]: Error accepting" + " connection from peer socket!"); e.printStackTrace(); } } for (int i = 0; i < np; i++) { WRAPPER_INFO += peers[i]; } // Loop to broadcast WRAPPER_INFO to all Wrappers for (int i = np; i > 0; i--) { try { sock = socketList.get(np - i); DataOutputStream out = new DataOutputStream(sock.getOutputStream()); out.writeUTF(WRAPPER_INFO); out.flush(); sock.close(); } catch (Exception e) { System.err.println("[MPJYarnClient.java]: Error closing" + " connection from peer socket.."); e.printStackTrace(); } } try { infoSock.close(); } catch (IOException exp) { exp.printStackTrace(); } // wait for all IO Threads to complete for (int i = 0; i < (np + 1); i++) { ioThreads[i].join(); } isRunning = true; System.out.println("\nApplication Statistics!"); while (true) { appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); fStatus = appReport.getFinalApplicationStatus(); if (appState == YarnApplicationState.FINISHED) { isRunning = false; if (fStatus == FinalApplicationStatus.SUCCEEDED) { System.out.println("State: " + fStatus); } else { System.out.println("State: " + fStatus); } break; } else if (appState == YarnApplicationState.KILLED) { isRunning = false; System.out.println("State: " + appState); break; } else if (appState == YarnApplicationState.FAILED) { isRunning = false; System.out.println("State: " + appState); break; } Thread.sleep(100); } try { if (debugYarn) { logger.info("Cleaning the files from hdfs: "); logger.info("1) " + dest.toString()); logger.info("2) " + wrapperDest.toString()); logger.info("3) " + userJarDest.toString()); } fs.delete(dest); fs.delete(wrapperDest); fs.delete(userJarDest); } catch (IOException exp) { exp.printStackTrace(); } System.out.println("Application ID: " + appId + "\n" + "Application User: " + appReport.getUser() + "\n" + "RM Queue: " + appReport.getQueue() + "\n" + "Start Time: " + appReport.getStartTime() + "\n" + "Finish Time: " + appReport.getFinishTime()); }
From source file:uk.ac.gla.terrier.probos.controller.ControllerServer.java
License:Open Source License
@Override public PBSNodeStatus[] getNodesStatus() throws Exception { //first use the container reports of all running jobs to get a picture of the hosts in use //for each job TIntObjectHashMap<List<ContainerId>> job2con = getAllActiveContainers(); final Map<String, TIntArrayList> node2job = new HashMap<String, TIntArrayList>(); job2con.forEachEntry(new TIntObjectProcedure<List<ContainerId>>() { @Override// w w w.j a v a2 s. com public boolean execute(int jobId, List<ContainerId> containerList) { for (ContainerId cid : containerList) { try { ContainerReport cr = yClient.getContainerReport(cid); String hostname = cr.getAssignedNode().getHost(); TIntArrayList jobs = node2job.get(hostname); if (jobs == null) node2job.put(hostname, jobs = new TIntArrayList()); jobs.add(jobId); } catch (Exception e) { throw new RuntimeException(e); } } return true; } }); List<NodeReport> nodeReports = yClient.getNodeReports(); PBSNodeStatus[] rtr = new PBSNodeStatus[nodeReports.size()]; for (int i = 0; i < rtr.length; i++) { final NodeReport node = nodeReports.get(i); String hostname = node.getNodeId().getHost(); String yarnState = node.getNodeState().toString(); String rack = node.getRackName(); String tracker = node.getHttpAddress(); int numContainers = node.getNumContainers(); int numProcs = node.getCapability().getVirtualCores(); TIntArrayList jobList = node2job.get(hostname); int[] jobs; if (jobList == null) jobs = new int[0]; else jobs = jobList.toArray(); String state = "free"; if (numContainers >= numProcs) state = "busy"; StringBuilder status = new StringBuilder(); status.append("capacity=" + node.getCapability().toString()); status.append(",used=" + node.getUsed().toString()); rtr[i] = new PBSNodeStatus(hostname, state, status.toString(), jobs, tracker, node.getHealthReport(), rack, yarnState, numProcs, node.getNodeLabels()); } return rtr; }