List of usage examples for org.apache.hadoop.yarn.api.records ContainerState COMPLETE
ContainerState COMPLETE
To view the source code for org.apache.hadoop.yarn.api.records ContainerState COMPLETE.
Click Source Link
From source file:com.datatorrent.stram.StreamingAppMasterService.java
License:Apache License
/** * Main run function for the application master * * @throws YarnException//from www . j a v a 2s . c om */ @SuppressWarnings("SleepWhileInLoop") private void execute() throws YarnException, IOException { LOG.info("Starting ApplicationMaster"); final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); LOG.info("number of tokens: {}", credentials.getAllTokens().size()); Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); LOG.debug("token: {}", token); } final Configuration conf = getConfig(); long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math .min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME))); long expiryTime = System.currentTimeMillis() + tokenLifeTime; LOG.debug(" expiry token time {}", tokenLifeTime); String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE); // Register self with ResourceManager RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0, appMasterTrackingUrl); // Dump out information about cluster capability as seen by the resource manager int maxMem = response.getMaximumResourceCapability().getMemory(); int maxVcores = response.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max mem {}m and vcores {} capabililty of resources in this cluster ", maxMem, maxVcores); // for locality relaxation fall back Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps .newHashMap(); // Setup heartbeat emitter // TODO poll RM every now and then with an empty request to let RM know that we are alive // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting: // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter // is not required. int loopCounter = -1; List<ContainerId> releasedContainers = new ArrayList<ContainerId>(); int numTotalContainers = 0; // keep track of already requested containers to not request them again while waiting for allocation int numRequestedContainers = 0; int numReleasedContainers = 0; int nextRequestPriority = 0; ResourceRequestHandler resourceRequestor = new ResourceRequestHandler(); YarnClient clientRMService = YarnClient.createYarnClient(); try { // YARN-435 // we need getClusterNodes to populate the initial node list, // subsequent updates come through the heartbeat response clientRMService.init(conf); clientRMService.start(); ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService, dag.getAttributes().get(DAG.APPLICATION_NAME), UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID)); if (ar != null) { appDone = true; dnmgr.shutdownDiagnosticsMessage = String.format( "Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.", ar.getApplicationId().toString(), ar.getName(), ar.getUser()); LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage); finishApplication(FinalApplicationStatus.FAILED, numTotalContainers); return; } resourceRequestor.updateNodeReports(clientRMService.getNodeReports()); } catch (Exception e) { throw new RuntimeException("Failed to retrieve cluster nodes report.", e); } finally { clientRMService.stop(); } // check for previously allocated containers // as of 2.2, containers won't survive AM restart, but this will change in the future - YARN-1490 checkContainerStatus(); FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED; final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); while (!appDone) { loopCounter++; if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime && hdfsKeyTabFile != null) { String applicationId = appAttemptID.getApplicationId().toString(); expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, "." + File.separator + "tmp", applicationId, conf, hdfsKeyTabFile, credentials, rmAddress, true); } Runnable r; while ((r = this.pendingTasks.poll()) != null) { r.run(); } // log current state /* * LOG.info("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" + * numTotalContainers + ", requested=" + numRequestedContainers + ", completed=" + numCompletedContainers + * ", failed=" + numFailedContainers + ", currentAllocated=" + this.allAllocatedContainers.size()); */ // Sleep before each loop when asking RM for containers // to avoid flooding RM with spurious requests when it // need not have any available containers try { sleep(1000); } catch (InterruptedException e) { LOG.info("Sleep interrupted " + e.getMessage()); } // Setup request to be sent to RM to allocate containers List<ContainerRequest> containerRequests = new ArrayList<ContainerRequest>(); List<ContainerRequest> removedContainerRequests = new ArrayList<ContainerRequest>(); // request containers for pending deploy requests if (!dnmgr.containerStartRequests.isEmpty()) { StreamingContainerAgent.ContainerStartRequest csr; while ((csr = dnmgr.containerStartRequests.poll()) != null) { if (csr.container.getRequiredMemoryMB() > maxMem) { LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.", csr.container.getRequiredMemoryMB(), maxMem); csr.container.setRequiredMemoryMB(maxMem); } if (csr.container.getRequiredVCores() > maxVcores) { LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.", csr.container.getRequiredVCores(), maxVcores); csr.container.setRequiredVCores(maxVcores); } csr.container.setResourceRequestPriority(nextRequestPriority++); ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true); MutablePair<Integer, ContainerRequest> pair = new MutablePair<Integer, ContainerRequest>( loopCounter, cr); requestedResources.put(csr, pair); containerRequests.add(cr); } } if (!requestedResources.isEmpty()) { //resourceRequestor.clearNodeMapping(); for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources .entrySet()) { if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) { StreamingContainerAgent.ContainerStartRequest csr = entry.getKey(); removedContainerRequests.add(entry.getValue().getRight()); ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false); entry.getValue().setLeft(loopCounter); entry.getValue().setRight(cr); containerRequests.add(cr); } } } numTotalContainers += containerRequests.size(); numRequestedContainers += containerRequests.size(); AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests, releasedContainers); if (amResp.getAMCommand() != null) { LOG.info(" statement executed:{}", amResp.getAMCommand()); switch (amResp.getAMCommand()) { case AM_RESYNC: case AM_SHUTDOWN: throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM"); default: throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM"); } } releasedContainers.clear(); // Retrieve list of allocated containers from the response List<Container> newAllocatedContainers = amResp.getAllocatedContainers(); // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size()); numRequestedContainers -= newAllocatedContainers.size(); long timestamp = System.currentTimeMillis(); for (Container allocatedContainer : newAllocatedContainers) { LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode=" + allocatedContainer.getNodeId() + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory" + allocatedContainer.getResource().getMemory() + ", priority" + allocatedContainer.getPriority()); // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString()); boolean alreadyAllocated = true; StreamingContainerAgent.ContainerStartRequest csr = null; for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources .entrySet()) { if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority() .getPriority()) { alreadyAllocated = false; csr = entry.getKey(); break; } } if (alreadyAllocated) { LOG.info("Releasing {} as resource with priority {} was already assigned", allocatedContainer.getId(), allocatedContainer.getPriority()); releasedContainers.add(allocatedContainer.getId()); numReleasedContainers++; numRequestedContainers++; continue; } if (csr != null) { requestedResources.remove(csr); } // allocate resource to container ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(), allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(), allocatedContainer.getResource().getMemory(), allocatedContainer.getResource().getVirtualCores(), allocatedContainer.getNodeHttpAddress()); StreamingContainerAgent sca = dnmgr.assignContainer(resource, null); if (sca == null) { // allocated container no longer needed, add release request LOG.warn("Container {} allocated but nothing to deploy, going to release this container.", allocatedContainer.getId()); releasedContainers.add(allocatedContainer.getId()); } else { AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer); this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder); ByteBuffer tokens = null; if (UserGroupInformation.isSecurityEnabled()) { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken( ugi.getUserName(), heartbeatListener.getAddress()); allocatedContainerHolder.delegationToken = delegationToken; //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress()); tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken); } LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer, nmClient, sca, tokens); // Thread launchThread = new Thread(runnableLaunchContainer); // launchThreads.add(launchThread); // launchThread.start(); launchContainer.run(); // communication with NMs is now async // record container start event StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString()); ev.setTimestamp(timestamp); dnmgr.recordEventAsync(ev); } } // track node updates for future locality constraint allocations // TODO: it seems 2.0.4-alpha doesn't give us any updates resourceRequestor.updateNodeReports(amResp.getUpdatedNodes()); // Check the completed containers List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses(); // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); AllocatedContainer allocatedContainer = allocatedContainers .remove(containerStatus.getContainerId().toString()); if (allocatedContainer != null && allocatedContainer.delegationToken != null) { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName()); } int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { if (allocatedContainer != null) { numFailedContainers.incrementAndGet(); } // if (exitStatus == 1) { // // non-recoverable StreamingContainer failure // appDone = true; // finalStatus = FinalApplicationStatus.FAILED; // dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId(); // LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage); // } // else { // Recoverable failure or process killed (externally or via stop request by AM) // also occurs when a container was released by the application but never assigned/launched LOG.debug("Container {} failed or killed.", containerStatus.getContainerId()); dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString()); // } } else { // container completed successfully numCompletedContainers.incrementAndGet(); LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } String containerIdStr = containerStatus.getContainerId().toString(); dnmgr.removeContainerAgent(containerIdStr); // record container stop event StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus()); ev.setReason(containerStatus.getDiagnostics()); dnmgr.recordEventAsync(ev); } if (dnmgr.forcedShutdown) { LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage); finalStatus = FinalApplicationStatus.FAILED; appDone = true; } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0 && dnmgr.containerStartRequests.isEmpty()) { LOG.debug("Exiting as no more containers are allocated or requested"); finalStatus = FinalApplicationStatus.SUCCEEDED; appDone = true; } LOG.debug("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" + numTotalContainers + ", requested=" + numRequestedContainers + ", released=" + numReleasedContainers + ", completed=" + numCompletedContainers + ", failed=" + numFailedContainers + ", currentAllocated=" + allocatedContainers.size()); // monitor child containers dnmgr.monitorHeartbeat(); } finishApplication(finalStatus, numTotalContainers); }
From source file:edu.cmu.graphchi.toolkits.collaborative_filtering.yarn.ApplicationMaster.java
License:Apache License
public void onContainersCompleted(List<ContainerStatus> completedContainers) { LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); // increment counters for completed/failed containers int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { // container failed if (ContainerExitStatus.ABORTED != exitStatus) { // shell script failed // counts as completed numCompletedContainers.incrementAndGet(); numFailedContainers.incrementAndGet(); } else { // container was killed by framework, possibly preempted // we should re-try as the container was lost for some reason //TODO: Add retry numCompletedContainers.incrementAndGet(); numFailedContainers.incrementAndGet(); // we do not need to release the container as it would be done // by the RM }// w w w.j a v a 2s.c o m } else { //nothing to do // container completed successfully numCompletedContainers.incrementAndGet(); LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } } }
From source file:io.hops.util.DBUtility.java
License:Apache License
public static RMNode processHopRMNodeCompsForScheduler(RMNodeComps hopRMNodeComps, RMContext rmContext) throws InvalidProtocolBufferException { org.apache.hadoop.yarn.api.records.NodeId nodeId; RMNode rmNode = null;/*from w w w .java2 s .co m*/ if (hopRMNodeComps != null) { nodeId = ConverterUtils.toNodeId(hopRMNodeComps.getRMNodeId()); rmNode = rmContext.getRMNodes().get(nodeId); // The first time we are receiving the RMNode, this will happen when the node registers if (rmNode == null) { // Retrieve heartbeat boolean nextHeartbeat = true; // Create Resource Resource resource = null; if (hopRMNodeComps.getHopResource() != null) { resource = Resource.newInstance(hopRMNodeComps.getHopResource().getMemory(), hopRMNodeComps.getHopResource().getVirtualCores()); } else { LOG.error("ResourceOption should not be null"); resource = Resource.newInstance(0, 0); } /*rmNode = new RMNodeImplDist(nodeId, rmContext, hopRMNodeComps.getHopRMNode().getHostName(), hopRMNodeComps.getHopRMNode().getCommandPort(), hopRMNodeComps.getHopRMNode().getHttpPort(), ResourceTrackerService.resolve(hopRMNodeComps.getHopRMNode().getHostName()), resourceOption, hopRMNodeComps.getHopRMNode().getNodemanagerVersion(), hopRMNodeComps.getHopRMNode().getHealthReport(), hopRMNodeComps.getHopRMNode().getLastHealthReportTime(), nextHeartbeat);*/ rmNode = new RMNodeImplDist(nodeId, rmContext, hopRMNodeComps.getHopRMNode().getHostName(), hopRMNodeComps.getHopRMNode().getCommandPort(), hopRMNodeComps.getHopRMNode().getHttpPort(), ResourceTrackerService.resolve(hopRMNodeComps.getHopRMNode().getHostName()), resource, hopRMNodeComps.getHopRMNode().getNodemanagerVersion()); // Force Java to put the host in cache NetUtils.createSocketAddrForHost(nodeId.getHost(), nodeId.getPort()); } // Update the RMNode if (hopRMNodeComps.getHopRMNode() != null) { ((RMNodeImplDist) rmNode).setState(hopRMNodeComps.getHopRMNode().getCurrentState()); } if (hopRMNodeComps.getHopUpdatedContainerInfo() != null) { List<io.hops.metadata.yarn.entity.UpdatedContainerInfo> hopUpdatedContainerInfoList = hopRMNodeComps .getHopUpdatedContainerInfo(); if (hopUpdatedContainerInfoList != null && !hopUpdatedContainerInfoList.isEmpty()) { ConcurrentLinkedQueue<org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo> updatedContainerInfoQueue = new ConcurrentLinkedQueue<>(); Map<Integer, org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo> ucis = new HashMap<>(); LOG.debug(hopRMNodeComps.getRMNodeId() + " getting ucis " + hopUpdatedContainerInfoList.size() + " pending event " + hopRMNodeComps.getPendingEvent().getId().getEventId()); for (io.hops.metadata.yarn.entity.UpdatedContainerInfo hopUCI : hopUpdatedContainerInfoList) { if (!ucis.containsKey(hopUCI.getUpdatedContainerInfoId())) { ucis.put(hopUCI.getUpdatedContainerInfoId(), new org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo( new ArrayList<org.apache.hadoop.yarn.api.records.ContainerStatus>(), new ArrayList<org.apache.hadoop.yarn.api.records.ContainerStatus>(), hopUCI.getUpdatedContainerInfoId())); } ContainerId cid = ConverterUtils.toContainerId(hopUCI.getContainerId()); io.hops.metadata.yarn.entity.ContainerStatus hopContainerStatus = hopRMNodeComps .getHopContainersStatusMap().get(hopUCI.getContainerId()); org.apache.hadoop.yarn.api.records.ContainerStatus conStatus = org.apache.hadoop.yarn.api.records.ContainerStatus .newInstance(cid, ContainerState.valueOf(hopContainerStatus.getState()), hopContainerStatus.getDiagnostics(), hopContainerStatus.getExitstatus()); // Check ContainerStatus state to add it in the appropriate list if (conStatus != null) { LOG.debug("add uci for container " + conStatus.getContainerId() + " status " + conStatus.getState()); if (conStatus.getState().equals(ContainerState.RUNNING)) { ucis.get(hopUCI.getUpdatedContainerInfoId()).getNewlyLaunchedContainers() .add(conStatus); } else if (conStatus.getState().equals(ContainerState.COMPLETE)) { ucis.get(hopUCI.getUpdatedContainerInfoId()).getCompletedContainers() .add(conStatus); } } } for (org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo uci : ucis .values()) { updatedContainerInfoQueue.add(uci); } ((RMNodeImplDist) rmNode).setUpdatedContainerInfo(updatedContainerInfoQueue); } else { LOG.debug(hopRMNodeComps.getRMNodeId() + " hopUpdatedContainerInfoList = null || hopUpdatedContainerInfoList.isEmpty() " + hopRMNodeComps.getPendingEvent().getId().getEventId()); } } else { LOG.debug(hopRMNodeComps.getRMNodeId() + " hopRMNodeFull.getHopUpdatedContainerInfo()=null " + hopRMNodeComps.getPendingEvent().getId().getEventId()); } } return rmNode; }
From source file:org.apache.flink.yarn.YarnResourceManagerTest.java
License:Apache License
private static ContainerStatus mockContainerStatus(ContainerId containerId) { ContainerStatus mockContainerStatus = mock(ContainerStatus.class); when(mockContainerStatus.getContainerId()).thenReturn(containerId); when(mockContainerStatus.getState()).thenReturn(ContainerState.COMPLETE); when(mockContainerStatus.getDiagnostics()).thenReturn("Test exit"); when(mockContainerStatus.getExitStatus()).thenReturn(-1); return mockContainerStatus; }
From source file:org.apache.hama.bsp.BSPTaskLauncher.java
License:Apache License
/** * This polls the current container status from container manager. Null if the * container hasn't finished yet./* w ww .j av a2s . c o m*/ * * @return * @throws Exception */ public BSPTaskStatus poll() throws Exception { ContainerStatus lastStatus = null; GetContainerStatusesResponse getContainerStatusesResponse = cm.getContainerStatuses(statusRequest); List<ContainerStatus> containerStatuses = getContainerStatusesResponse.getContainerStatuses(); for (ContainerStatus containerStatus : containerStatuses) { LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); if (containerStatus.getContainerId().equals(allocatedContainer.getId())) { lastStatus = containerStatus; break; } } if (lastStatus.getState() != ContainerState.COMPLETE) { return null; } LOG.info(this.id + " Last report comes with exitstatus of " + lastStatus.getExitStatus() + " and diagnose string of " + lastStatus.getDiagnostics()); return new BSPTaskStatus(id, lastStatus.getExitStatus()); }
From source file:org.apache.helix.provisioning.yarn.RMCallbackHandler.java
License:Apache License
@Override public void onContainersCompleted(List<ContainerStatus> completedContainers) { LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { GenericApplicationMaster.LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); synchronized (_genericApplicationMaster.allocatedContainerSet) { _genericApplicationMaster.allocatedContainerSet.remove(containerStatus.getContainerId()); SettableFuture<ContainerStopResponse> stopResponseFuture = _genericApplicationMaster.containerStopMap .remove(containerStatus.getContainerId()); if (stopResponseFuture != null) { ContainerStopResponse value = new ContainerStopResponse(); stopResponseFuture.set(value); } else { SettableFuture<ContainerReleaseResponse> releaseResponseFuture = _genericApplicationMaster.containerReleaseMap .remove(containerStatus.getContainerId()); if (releaseResponseFuture != null) { ContainerReleaseResponse value = new ContainerReleaseResponse(); releaseResponseFuture.set(value); }/*w w w.j a v a 2 s. c o m*/ } } // increment counters for completed/failed containers int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { // container failed if (ContainerExitStatus.ABORTED != exitStatus) { } else { // container was killed by framework, possibly preempted // we should re-try as the container was lost for some reason // we do not need to release the container as it would be done // by the RM } } else { // nothing to do // container completed successfully GenericApplicationMaster.LOG.info( "Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } } }
From source file:org.apache.hoya.yarn.appmaster.HoyaAppMaster.java
License:Apache License
@Override //AMRMClientAsync public synchronized void onContainersCompleted(List<ContainerStatus> completedContainers) { LOG_YARN.info("onContainersCompleted([{}]", completedContainers.size()); for (ContainerStatus status : completedContainers) { ContainerId containerId = status.getContainerId(); LOG_YARN.info(//from w w w .j a v a 2 s .c o m "Container Completion for" + " containerID={}," + " state={}," + " exitStatus={}," + " diagnostics={}", containerId, status.getState(), status.getExitStatus(), status.getDiagnostics()); // non complete containers should not be here assert (status.getState() == ContainerState.COMPLETE); AppState.NodeCompletionResult result = appState.onCompletedNode(conf, status); if (result.containerFailed) { RoleInstance ri = result.roleInstance; log.error("Role instance {} failed ", ri); } } // ask for more containers if any failed // In the case of Hoya, we don't expect containers to complete since // Hoya is a long running application. Keep track of how many containers // are completing. If too many complete, abort the application // TODO: this needs to be better thought about (and maybe something to // better handle in Yarn for long running apps) try { reviewRequestAndReleaseNodes(); } catch (HoyaInternalStateException e) { log.warn("Exception while flexing nodes", e); } }
From source file:org.apache.metron.maas.service.callback.ContainerRequestListener.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from w w w .jav a2s . c o m*/ public void onContainersCompleted(List<ContainerStatus> completedContainers) { LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); removeContainer(containerStatus.getContainerId()); LOG.info("REMOVING CONTAINER " + containerStatus.getContainerId()); serviceDiscoverer.unregisterByContainer(containerStatus.getContainerId() + ""); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); // increment counters for completed/failed containers int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { // container failed if (ContainerExitStatus.ABORTED != exitStatus) { // shell script failed // counts as completed } else { // container was killed by framework, possibly preempted // we should re-try as the container was lost for some reason // we do not need to release the container as it would be done // by the RM } } else { // nothing to do // container completed successfully LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } if (timelineClient != null) { YarnUtils.INSTANCE.publishContainerEndEvent(timelineClient, containerStatus, domainId, appSubmitterUgi); } } }
From source file:org.apache.slider.server.appmaster.SliderAppMaster.java
License:Apache License
@Override //AMRMClientAsync public synchronized void onContainersCompleted(List<ContainerStatus> completedContainers) { LOG_YARN.info("onContainersCompleted([{}]", completedContainers.size()); for (ContainerStatus status : completedContainers) { ContainerId containerId = status.getContainerId(); LOG_YARN.info(/* www. j a v a2 s .co m*/ "Container Completion for" + " containerID={}," + " state={}," + " exitStatus={}," + " diagnostics={}", containerId, status.getState(), status.getExitStatus(), status.getDiagnostics()); // non complete containers should not be here assert (status.getState() == ContainerState.COMPLETE); AppState.NodeCompletionResult result = appState.onCompletedNode(status); if (result.containerFailed) { RoleInstance ri = result.roleInstance; log.error("Role instance {} failed ", ri); } // known nodes trigger notifications if (!result.unknownNode) { getProviderService().notifyContainerCompleted(containerId); queue(new UnregisterComponentInstance(containerId, 0, TimeUnit.MILLISECONDS)); } } reviewRequestAndReleaseNodes("onContainersCompleted"); }
From source file:org.springframework.yarn.am.cluster.AbstractManagedContainerClusterAppmasterTests.java
License:Apache License
protected void releaseContainer(Object appmaster, Container container) throws Exception { ContainerStatus containerStatus = MockUtils.getMockContainerStatus(container.getId(), ContainerState.COMPLETE, 0); TestUtils.callMethod("onContainerCompleted", appmaster, new Object[] { containerStatus }, new Class<?>[] { ContainerStatus.class }); }