List of usage examples for org.apache.hadoop.yarn.api.records NodeReport getUsed
@Public @Stable public abstract Resource getUsed();
Resource
on the node. From source file:co.cask.cdap.master.startup.YarnCheck.java
License:Apache License
private void checkResources(List<NodeReport> nodeReports) { LOG.info("Checking that YARN has enough resources to run all system services."); int memoryCapacity = 0; int vcoresCapacity = 0; int memoryUsed = 0; int vcoresUsed = 0; int availableNodes = 0; for (NodeReport nodeReport : nodeReports) { NodeId nodeId = nodeReport.getNodeId(); LOG.debug("Got report for node {}", nodeId); if (!nodeReport.getNodeState().isUnusable()) { Resource nodeCapability = nodeReport.getCapability(); Resource nodeUsed = nodeReport.getUsed(); // some versions of hadoop return null, others do not if (nodeCapability != null) { LOG.debug("node {} resource capability: memory = {}, vcores = {}", nodeId, nodeCapability.getMemory(), nodeCapability.getVirtualCores()); memoryCapacity += nodeCapability.getMemory(); vcoresCapacity += nodeCapability.getVirtualCores(); }//from w ww . ja va 2 s . co m if (nodeUsed != null) { LOG.debug("node {} resources used: memory = {}, vcores = {}", nodeId, nodeUsed.getMemory(), nodeUsed.getVirtualCores()); memoryUsed += nodeUsed.getMemory(); vcoresUsed += nodeUsed.getVirtualCores(); } availableNodes++; } } LOG.debug("YARN resource capacity: {} MB of memory and {} virtual cores.", memoryCapacity, vcoresCapacity); LOG.debug("YARN resources used: {} MB of memory and {} virtual cores.", memoryUsed, vcoresUsed); // calculate memory and vcores required by CDAP int requiredMemoryMB = 0; int requiredVCores = 0; Set<String> invalidKeys = new HashSet<>(); for (ServiceResourceKeys serviceResourceKeys : systemServicesResourceKeys) { boolean hasConfigError = false; int instances = 0; int memoryMB = 0; int vcores = 0; try { instances = cConf.getInt(serviceResourceKeys.getInstancesKey()); } catch (Exception e) { invalidKeys.add(serviceResourceKeys.getInstancesKey()); hasConfigError = true; } try { memoryMB = cConf.getInt(serviceResourceKeys.getMemoryKey()); } catch (Exception e) { invalidKeys.add(serviceResourceKeys.getMemoryKey()); hasConfigError = true; } try { vcores = cConf.getInt(serviceResourceKeys.getVcoresKey()); } catch (Exception e) { invalidKeys.add(serviceResourceKeys.getVcoresKey()); hasConfigError = true; } if (!hasConfigError) { LOG.debug("Resource settings for system service {}: {}={}, {}={}, {}={}", serviceResourceKeys.getServiceName(), serviceResourceKeys.getInstancesKey(), instances, serviceResourceKeys.getMemoryKey(), memoryMB, serviceResourceKeys.getVcoresKey(), vcores); requiredMemoryMB += memoryMB * instances; requiredVCores += vcores * instances; } } if (!invalidKeys.isEmpty()) { throw new RuntimeException("YARN resources check failed to invalid config settings for keys: " + Joiner.on(',').join(invalidKeys)); } LOG.debug("{} MB of memory and {} virtual cores are required.", requiredMemoryMB, requiredVCores); int availableMemoryMB = memoryCapacity - memoryUsed; int availableVCores = vcoresCapacity - vcoresUsed; boolean memoryOK = requiredMemoryMB <= availableMemoryMB; // if this is negative or zero just assume its not using vcores boolean vcoresOK = vcoresCapacity <= 0 || requiredVCores <= availableVCores; if (!memoryOK && !vcoresOK) { LOG.warn( "Services require {} MB of memory and {} vcores, " + "but the cluster only has {} MB of memory and {} vcores available.", requiredMemoryMB, requiredVCores, availableMemoryMB, availableVCores); } else if (!memoryOK) { LOG.warn("Services require {} MB of memory but the cluster only has {} MB of memory available.", requiredMemoryMB, availableMemoryMB); } else if (!vcoresOK) { LOG.warn("Services require {} vcores but the cluster only has {} vcores available.", requiredVCores, availableVCores); } else { LOG.info(" YARN resources successfully verified."); } }
From source file:co.cask.cdap.operations.yarn.YarnResources.java
License:Apache License
@Override public synchronized void collect() throws Exception { reset();// ww w . j a va 2 s . com List<NodeReport> nodeReports; YarnClient yarnClient = createYARNClient(); try { nodeReports = yarnClient.getNodeReports(); } finally { yarnClient.stop(); } for (NodeReport nodeReport : nodeReports) { NodeId nodeId = nodeReport.getNodeId(); LOG.debug("Got report for node {}", nodeId); if (!nodeReport.getNodeState().isUnusable()) { Resource nodeCapability = nodeReport.getCapability(); Resource nodeUsed = nodeReport.getUsed(); // some versions of hadoop return null, others do not if (nodeCapability != null) { LOG.debug("node {} resource capability: memory = {}, vcores = {}", nodeId, nodeCapability.getMemory(), nodeCapability.getVirtualCores()); totalMemory += nodeCapability.getMemory(); totalVCores += nodeCapability.getVirtualCores(); } if (nodeUsed != null) { LOG.debug("node {} resources used: memory = {}, vcores = {}", nodeId, nodeUsed.getMemory(), nodeUsed.getVirtualCores()); usedMemory += nodeUsed.getMemory(); usedVCores += nodeUsed.getVirtualCores(); } } } }
From source file:com.datatorrent.stram.LocalityTest.java
License:Apache License
@Test public void testNodeLocal() { LogicalPlan dag = new LogicalPlan(); dag.getAttributes().put(com.datatorrent.api.Context.DAGContext.APPLICATION_PATH, new File("target", LocalityTest.class.getName()).getAbsolutePath()); dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent()); GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class); GenericTestOperator partitioned = dag.addOperator("partitioned", GenericTestOperator.class); dag.getMeta(partitioned).getAttributes().put(OperatorContext.PARTITIONER, new StatelessPartitioner<GenericTestOperator>(2)); GenericTestOperator partitionedParallel = dag.addOperator("partitionedParallel", GenericTestOperator.class); dag.addStream("o1_outport1", o1.outport1, partitioned.inport1).setLocality(null); dag.addStream("partitioned_outport1", partitioned.outport1, partitionedParallel.inport2) .setLocality(Locality.NODE_LOCAL); dag.setInputPortAttribute(partitionedParallel.inport2, PortContext.PARTITION_PARALLEL, true); GenericTestOperator single = dag.addOperator("single", GenericTestOperator.class); dag.addStream("partitionedParallel_outport1", partitionedParallel.outport1, single.inport1); int maxContainers = 7; dag.setAttribute(LogicalPlan.CONTAINERS_MAX_COUNT, maxContainers); StreamingContainerManager scm = new StreamingContainerManager(dag); Assert.assertEquals("number required containers", 7, scm.containerStartRequests.size()); ResourceRequestHandler rr = new ResourceRequestHandler(); int containerMem = 2000; Map<String, NodeReport> nodeReports = Maps.newHashMap(); NodeReport nr = BuilderUtils.newNodeReport(BuilderUtils.newNodeId("host1", 0), NodeState.RUNNING, "httpAddress", "rackName", BuilderUtils.newResource(0, 0), BuilderUtils.newResource(containerMem * 2, 2), 0, null, 0); nodeReports.put(nr.getNodeId().getHost(), nr); nr = BuilderUtils.newNodeReport(BuilderUtils.newNodeId("host2", 0), NodeState.RUNNING, "httpAddress", "rackName", BuilderUtils.newResource(0, 0), BuilderUtils.newResource(containerMem * 2, 2), 0, null, 0);//from w w w .j a v a 2s . c om nodeReports.put(nr.getNodeId().getHost(), nr); // set resources rr.updateNodeReports(Lists.newArrayList(nodeReports.values())); Map<PTContainer, String> requestedHosts = Maps.newHashMap(); for (ContainerStartRequest csr : scm.containerStartRequests) { String host = rr.getHost(csr, true); csr.container.host = host; // update the node report if (host != null) { requestedHosts.put(csr.container, host); nr = nodeReports.get(host); nr.getUsed().setMemory(nr.getUsed().getMemory() + containerMem); } } Assert.assertEquals("" + requestedHosts, nodeReports.keySet(), Sets.newHashSet(requestedHosts.values())); for (Map.Entry<PTContainer, String> e : requestedHosts.entrySet()) { for (PTOperator oper : e.getKey().getOperators()) { if (oper.getNodeLocalOperators().getOperatorSet().size() > 1) { String expHost = null; for (PTOperator nodeLocalOper : oper.getNodeLocalOperators().getOperatorSet()) { Assert.assertNotNull("host null " + nodeLocalOper.getContainer(), nodeLocalOper.getContainer().host); if (expHost == null) { expHost = nodeLocalOper.getContainer().host; } else { Assert.assertEquals("expected same host " + nodeLocalOper, expHost, nodeLocalOper.getContainer().host); } } } } } }
From source file:com.datatorrent.stram.ResourceRequestHandler.java
License:Apache License
/** * Tracks update to available resources. Resource availability is used to make decisions about where to request new * containers./* w w w .j a v a 2 s . c o m*/ * * @param nodeReports */ public void updateNodeReports(List<NodeReport> nodeReports) { // LOG.debug("Got {} updated node reports.", nodeReports.size()); for (NodeReport nr : nodeReports) { StringBuilder sb = new StringBuilder(); sb.append("rackName=").append(nr.getRackName()).append(",nodeid=").append(nr.getNodeId()) .append(",numContainers=").append(nr.getNumContainers()).append(",capability=") .append(nr.getCapability()).append("used=").append(nr.getUsed()).append("state=") .append(nr.getNodeState()); LOG.info("Node report: " + sb); nodeReportMap.put(nr.getNodeId().getHost(), nr); nodeToRack.put(nr.getNodeId().getHost(), nr.getRackName()); } }
From source file:com.datatorrent.stram.ResourceRequestHandler.java
License:Apache License
public String getHost(ContainerStartRequest csr, boolean first) { String host = null;// www . j a v a2 s .c o m PTContainer c = csr.container; if (first) { for (PTOperator oper : c.getOperators()) { HostOperatorSet grpObj = oper.getNodeLocalOperators(); host = nodeLocalMapping.get(grpObj.getOperatorSet()); if (host != null) { return host; } if (grpObj.getHost() != null) { host = grpObj.getHost(); // using the 1st host value as host for container break; } } if (host != null && nodeReportMap.get(host) != null) { for (PTOperator oper : c.getOperators()) { HostOperatorSet grpObj = oper.getNodeLocalOperators(); Set<PTOperator> nodeLocalSet = grpObj.getOperatorSet(); NodeReport report = nodeReportMap.get(host); int aggrMemory = c.getRequiredMemoryMB(); int vCores = c.getRequiredVCores(); Set<PTContainer> containers = Sets.newHashSet(); containers.add(c); for (PTOperator nodeLocalOper : nodeLocalSet) { if (!containers.contains(nodeLocalOper.getContainer())) { aggrMemory += nodeLocalOper.getContainer().getRequiredMemoryMB(); vCores += nodeLocalOper.getContainer().getRequiredVCores(); containers.add(nodeLocalOper.getContainer()); } } int memAvailable = report.getCapability().getMemory() - report.getUsed().getMemory(); int vCoresAvailable = report.getCapability().getVirtualCores() - report.getUsed().getVirtualCores(); if (memAvailable >= aggrMemory && vCoresAvailable >= vCores) { nodeLocalMapping.put(nodeLocalSet, host); return host; } } } } // the host requested didn't have the resources so looking for other hosts host = null; for (PTOperator oper : c.getOperators()) { HostOperatorSet grpObj = oper.getNodeLocalOperators(); Set<PTOperator> nodeLocalSet = grpObj.getOperatorSet(); if (nodeLocalSet.size() > 1) { LOG.debug("Finding new host for {}", nodeLocalSet); int aggrMemory = c.getRequiredMemoryMB(); int vCores = c.getRequiredVCores(); Set<PTContainer> containers = Sets.newHashSet(); containers.add(c); // aggregate memory required for all containers for (PTOperator nodeLocalOper : nodeLocalSet) { if (!containers.contains(nodeLocalOper.getContainer())) { aggrMemory += nodeLocalOper.getContainer().getRequiredMemoryMB(); vCores += nodeLocalOper.getContainer().getRequiredVCores(); containers.add(nodeLocalOper.getContainer()); } } for (Map.Entry<String, NodeReport> nodeEntry : nodeReportMap.entrySet()) { int memAvailable = nodeEntry.getValue().getCapability().getMemory() - nodeEntry.getValue().getUsed().getMemory(); int vCoresAvailable = nodeEntry.getValue().getCapability().getVirtualCores() - nodeEntry.getValue().getUsed().getVirtualCores(); if (memAvailable >= aggrMemory && vCoresAvailable >= vCores) { host = nodeEntry.getKey(); grpObj.setHost(host); nodeLocalMapping.put(nodeLocalSet, host); return host; } } } } return host; }
From source file:com.datatorrent.stram.StramMiniClusterTest.java
License:Apache License
@Test public void testSetupShutdown() throws Exception { GetClusterNodesRequest request = Records.newRecord(GetClusterNodesRequest.class); ClientRMService clientRMService = yarnCluster.getResourceManager().getClientRMService(); GetClusterNodesResponse response = clientRMService.getClusterNodes(request); List<NodeReport> nodeReports = response.getNodeReports(); LOG.info("{}", nodeReports); for (NodeReport nr : nodeReports) { LOG.info("Node: {}", nr.getNodeId()); LOG.info("Total memory: {}", nr.getCapability()); LOG.info("Used memory: {}", nr.getUsed()); LOG.info("Number containers: {}", nr.getNumContainers()); }/* ww w . j ava 2 s . c o m*/ String appMasterJar = JarFinder.getJar(StreamingAppMaster.class); LOG.info("appmaster jar: " + appMasterJar); String testJar = JarFinder.getJar(StramMiniClusterTest.class); LOG.info("testJar: " + testJar); // create test application Properties dagProps = new Properties(); // input module (ensure shutdown works while windows are generated) dagProps.put(StreamingApplication.DT_PREFIX + "operator.numGen.classname", TestGeneratorInputOperator.class.getName()); dagProps.put(StreamingApplication.DT_PREFIX + "operator.numGen.maxTuples", "1"); // fake output adapter - to be ignored when determine shutdown //props.put(DAGContext.DT_PREFIX + "stream.output.classname", HDFSOutputStream.class.getName()); //props.put(DAGContext.DT_PREFIX + "stream.output.inputNode", "module2"); //props.put(DAGContext.DT_PREFIX + "stream.output.filepath", "miniclustertest-testSetupShutdown.out"); dagProps.put(StreamingApplication.DT_PREFIX + "operator.module1.classname", GenericTestOperator.class.getName()); dagProps.put(StreamingApplication.DT_PREFIX + "operator.module2.classname", GenericTestOperator.class.getName()); dagProps.put(StreamingApplication.DT_PREFIX + "stream.fromNumGen.source", "numGen.outport"); dagProps.put(StreamingApplication.DT_PREFIX + "stream.fromNumGen.sinks", "module1.inport1"); dagProps.put(StreamingApplication.DT_PREFIX + "stream.n1n2.source", "module1.outport1"); dagProps.put(StreamingApplication.DT_PREFIX + "stream.n1n2.sinks", "module2.inport1"); dagProps.setProperty(StreamingApplication.DT_PREFIX + LogicalPlan.MASTER_MEMORY_MB.getName(), "128"); dagProps.setProperty(StreamingApplication.DT_PREFIX + LogicalPlan.CONTAINER_JVM_OPTIONS.getName(), "-Dlog4j.properties=custom_log4j.properties"); dagProps.setProperty(StreamingApplication.DT_PREFIX + "operator.*." + OperatorContext.MEMORY_MB.getName(), "64"); dagProps.setProperty(StreamingApplication.DT_PREFIX + "operator.*." + OperatorContext.VCORES.getName(), "1"); dagProps.setProperty(StreamingApplication.DT_PREFIX + "operator.*.port.*." + Context.PortContext.BUFFER_MEMORY_MB.getName(), "32"); dagProps.setProperty(StreamingApplication.DT_PREFIX + LogicalPlan.DEBUG.getName(), "true"); //dagProps.setProperty(StreamingApplication.DT_PREFIX + LogicalPlan.CONTAINERS_MAX_COUNT.getName(), "2"); LOG.info("dag properties: {}", dagProps); LOG.info("Initializing Client"); LogicalPlanConfiguration tb = new LogicalPlanConfiguration(conf); tb.addFromProperties(dagProps, null); LogicalPlan dag = createDAG(tb); Configuration yarnConf = new Configuration(yarnCluster.getConfig()); StramClient client = new StramClient(yarnConf, dag); try { client.start(); if (StringUtils.isBlank(System.getenv("JAVA_HOME"))) { client.javaCmd = "java"; // JAVA_HOME not set in the yarn mini cluster } LOG.info("Running client"); client.startApplication(); boolean result = client.monitorApplication(); LOG.info("Client run completed. Result=" + result); Assert.assertTrue(result); } finally { client.stop(); } }
From source file:com.ibm.bi.dml.yarn.ropt.YarnClusterAnalyzer.java
License:Open Source License
/** * //from w ww. j a va 2 s . c o m * @return * @throws YarnException * @throws IOException */ public static double getClusterUtilization() throws IOException { double util = 0; try { if (_client == null) _client = createYarnClient(); List<NodeReport> nodesReport = _client.getNodeReports(); double maxMem = 0; double currMem = 0; long maxCores = 0; long currCores = 0; for (NodeReport node : nodesReport) { Resource max = node.getCapability(); Resource used = node.getUsed(); maxMem += max.getMemory(); currMem += used.getMemory(); maxCores += max.getVirtualCores(); currCores += used.getVirtualCores(); } util = Math.max(Math.min(1, currMem / maxMem), //memory util Math.min(1, (double) currCores / maxCores)); //vcore util } catch (Exception ex) { throw new IOException(ex); } return util; }
From source file:com.ibm.bi.dml.yarn.ropt.YarnClusterAnalyzer.java
License:Open Source License
/** * Analyzes properties of Yarn cluster and Hadoop configurations. *//*from w w w .jav a2 s . c o m*/ public static void analyzeYarnCluster(YarnClient yarnClient, YarnConfiguration conf, boolean verbose) { try { List<NodeReport> nodesReport = yarnClient.getNodeReports(); if (verbose) System.out.println("There are " + nodesReport.size() + " nodes in the cluster"); if (nodesReport.isEmpty()) throw new YarnException("There are zero available nodes in the yarn cluster"); nodesMaxPhySorted = new ArrayList<Long>(nodesReport.size()); clusterTotalMem = 0; clusterTotalCores = 0; clusterTotalNodes = 0; minimumMRContainerPhyMB = -1; for (NodeReport node : nodesReport) { Resource resource = node.getCapability(); Resource used = node.getUsed(); if (used == null) used = Resource.newInstance(0, 0); int mb = resource.getMemory(); int cores = resource.getVirtualCores(); if (mb <= 0) throw new YarnException("A node has non-positive memory " + mb); int myMinMRPhyMB = mb / cores / CPU_HYPER_FACTOR; if (minimumMRContainerPhyMB < myMinMRPhyMB) minimumMRContainerPhyMB = myMinMRPhyMB; // minimumMRContainerPhyMB needs to be the largest among the mins clusterTotalMem += (long) mb * 1024 * 1024; nodesMaxPhySorted.add((long) mb * 1024 * 1024); clusterTotalCores += cores; clusterTotalNodes++; if (verbose) System.out.println("\t" + node.getNodeId() + " has " + mb + " MB (" + used.getMemory() + " MB used) memory and " + resource.getVirtualCores() + " (" + used.getVirtualCores() + " used) cores"); } Collections.sort(nodesMaxPhySorted, Collections.reverseOrder()); nodesMaxBudgetSorted = new ArrayList<Double>(nodesMaxPhySorted.size()); for (int i = 0; i < nodesMaxPhySorted.size(); i++) nodesMaxBudgetSorted.add(ResourceOptimizer.phyToBudget(nodesMaxPhySorted.get(i))); _remotePar = nodesReport.size(); if (_remotePar == 0) throw new YarnException("There are no available nodes in the yarn cluster"); // Now get the default cluster settings _remoteMRSortMem = (1024 * 1024) * conf.getLong("io.sort.mb", 100); //100MB //handle jvm max mem (map mem budget is relevant for map-side distcache and parfor) //(for robustness we probe both: child and map configuration parameters) String javaOpts1 = conf.get("mapred.child.java.opts"); //internally mapred/mapreduce synonym String javaOpts2 = conf.get("mapreduce.map.java.opts", null); //internally mapred/mapreduce synonym String javaOpts3 = conf.get("mapreduce.reduce.java.opts", null); //internally mapred/mapreduce synonym if (javaOpts2 != null) //specific value overrides generic _remoteJVMMaxMemMap = extractMaxMemoryOpt(javaOpts2); else _remoteJVMMaxMemMap = extractMaxMemoryOpt(javaOpts1); if (javaOpts3 != null) //specific value overrides generic _remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts3); else _remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts1); //HDFS blocksize String blocksize = conf.get(MRConfigurationNames.DFS_BLOCK_SIZE, "134217728"); _blocksize = Long.parseLong(blocksize); minimalPhyAllocate = (long) 1024 * 1024 * conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); maximumPhyAllocate = (long) 1024 * 1024 * conf.getInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB); mrAMPhy = (long) conf.getInt("yarn.app.mapreduce.am.resource.mb", 1536) * 1024 * 1024; } catch (Exception e) { throw new RuntimeException("Unable to analyze yarn cluster ", e); } /* * This is for AppMaster to query available resource in the cluster during heartbeat * AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient(); rmClient.init(conf); rmClient.start(); AllocateResponse response = rmClient.allocate(0); int nodeCount = response.getNumClusterNodes(); Resource resource = response.getAvailableResources(); List<NodeReport> nodeUpdate = response.getUpdatedNodes(); LOG.info("This is a " + nodeCount + " node cluster with totally " + resource.getMemory() + " memory and " + resource.getVirtualCores() + " cores"); LOG.info(nodereport.size() + " updatedNode reports received"); for (NodeReport node : nodeUpdate) { resource = node.getCapability(); LOG.info(node.getNodeId() + " updated with " + resource.getMemory() + " memory and " + resource.getVirtualCores() + " cores"); }*/ }
From source file:edu.cmu.graphchi.toolkits.collaborative_filtering.yarn.ApplicationMaster.java
License:Apache License
/** * Main run function for the application master * * @throws YarnException/* w ww .j a v a 2 s. c o m*/ * @throws IOException */ @SuppressWarnings({ "unchecked" }) public boolean run() throws YarnException, IOException { yarnClient.start(); LOG.info("Starting ApplicationMaster"); Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); // Now remove the AM->RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amRMClient = AMRMClient.createAMRMClient(); amRMClient.init(conf); amRMClient.start(); containerListener = createNMCallbackHandler(); nmClientAsync = new NMClientAsyncImpl(containerListener); nmClientAsync.init(conf); nmClientAsync.start(); // Register self with ResourceManager // This will start heartbeating to the RM appMasterHostname = NetUtils.getHostname(); RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); //TODO: Figure out how to do this. List<NodeReport> reports = this.yarnClient.getNodeReports(); LOG.info("Cluster Status"); List<Resource> availableResources = new ArrayList<Resource>(); for (NodeReport nr : reports) { LOG.info(" NodeId: " + nr.getNodeId() + " Capabilities " + nr.getCapability() + " Used Resources " + nr.getUsed()); int availableMem = nr.getCapability().getMemory() - nr.getUsed().getMemory(); int availableVCores = nr.getCapability().getVirtualCores() - nr.getUsed().getVirtualCores(); Resource resource = Resource.newInstance(availableMem, availableVCores); availableResources.add(resource); } /*Based on available resources scheduler should decide the best allocation of recommenders to resources and return a list of resources that should be requested from the ResourceManager*/ DataSetDescription datasetDesc = new DataSetDescription(this.setup.dataMetadataFile); List<RecommenderPool> recommenderPools = RecommenderScheduler.splitRecommenderPool(availableResources, recommenders, datasetDesc, this.setup.nShards); for (RecommenderPool res : recommenderPools) { ContainerRequest containerAsk = setupContainerAskForRM(res.getTotalMemory(), requestPriority); LOG.info("CONTAINER ASK: " + containerAsk); amRMClient.addContainerRequest(containerAsk); } float progress = 0; List<RecommenderPool> pendingPools = new ArrayList<RecommenderPool>(); for (RecommenderPool p : recommenderPools) pendingPools.add(p); this.numTotalContainers = recommenderPools.size(); this.numCompletedContainers.set(0); this.numAllocatedContainers.set(0); while (numCompletedContainers.get() != numTotalContainers) { try { Thread.sleep(200); AllocateResponse allocResp = amRMClient.allocate(progress); List<Container> newContainers = allocResp.getAllocatedContainers(); List<ContainerStatus> completedContainers = allocResp.getCompletedContainersStatuses(); if (this.numAllocatedContainers.get() >= this.numTotalContainers && pendingPools.size() != 0) { //Ask for new containers for pending pools LOG.warn("The number of allocated containers has exceeded number of total containers, but " + "the pending pool size is still not 0. Asking for new containers from RM"); for (RecommenderPool res : pendingPools) { ContainerRequest containerAsk = setupContainerAskForRM(res.getTotalMemory(), requestPriority); LOG.info("NEW CONTAINER ASK: " + containerAsk); amRMClient.addContainerRequest(containerAsk); } } if (newContainers.size() > 0) { LOG.info("Allocated " + newContainers.size() + " new containers"); numAllocatedContainers.addAndGet(newContainers.size()); for (Container container : newContainers) { //Find matching recommender pool from pendingRecommender pools. RecommenderPool pool = null; for (RecommenderPool p : pendingPools) { if (p.getTotalMemory() == container.getResource().getMemory()) { pool = p; break; } } if (pool == null) { LOG.warn("No Takers for Container " + container + " Releasing container"); amRMClient.releaseAssignedContainer(container.getId()); } else { startContainer(container, pool); //This pool has now got a container. Remove it from pending pools pendingPools.remove(pool); } } } onContainersCompleted(completedContainers); } catch (InterruptedException ex) { } } finish(); return success; }
From source file:eu.stratosphere.yarn.Client.java
License:Apache License
private ClusterResourceDescription getCurrentFreeClusterResources(YarnClient yarnClient) throws YarnException, IOException { ClusterResourceDescription crd = new ClusterResourceDescription(); crd.totalFreeMemory = 0;//from w w w .j av a 2 s. c o m crd.containerLimit = 0; List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING); for (NodeReport rep : nodes) { int free = rep.getCapability().getMemory() - (rep.getUsed() != null ? rep.getUsed().getMemory() : 0); crd.totalFreeMemory += free; if (free > crd.containerLimit) { crd.containerLimit = free; } } return crd; }