List of usage examples for org.apache.hadoop.yarn.api.records ContainerStatus getState
@Public @Stable public abstract ContainerState getState();
ContainerState
of the container. From source file:org.apache.flink.yarn.YarnFlinkResourceManager.java
License:Apache License
/** * Invoked when the ResourceManager informs of completed containers. * Called via an actor message by the callback from the ResourceManager client. * //from w w w. j a v a2 s . c o m * @param containers The containers that have completed. */ private void containersComplete(List<ContainerStatus> containers) { // the list contains both failed containers, as well as containers that // were gracefully returned by this application master for (ContainerStatus status : containers) { final ResourceID id = new ResourceID(status.getContainerId().toString()); // check if this is a failed container or a completed container if (containersBeingReturned.remove(status.getContainerId()) != null) { // regular completed container that we released LOG.info("Container {} completed successfully with diagnostics: {}", id, status.getDiagnostics()); } else { // failed container, either at startup, or running final String exitStatus; switch (status.getExitStatus()) { case -103: exitStatus = "Vmem limit exceeded (-103)"; break; case -104: exitStatus = "Pmem limit exceeded (-104)"; break; default: exitStatus = String.valueOf(status.getExitStatus()); } final YarnContainerInLaunch launched = containersInLaunch.remove(id); if (launched != null) { LOG.info("Container {} failed, with a TaskManager in launch or registration. " + "Exit status: {}", id, exitStatus); // we will trigger re-acquiring new containers at the end } else { // failed registered worker LOG.info("Container {} failed. Exit status: {}", id, exitStatus); // notify the generic logic, which notifies the JobManager, etc. notifyWorkerFailed(id, "Container " + id + " failed. " + "Exit status: {}" + exitStatus); } // general failure logging failedContainersSoFar++; String diagMessage = String.format( "Diagnostics for container %s in state %s : " + "exitStatus=%s diagnostics=%s", id, status.getState(), exitStatus, status.getDiagnostics()); sendInfoMessage(diagMessage); LOG.info(diagMessage); LOG.info("Total number of failed containers so far: " + failedContainersSoFar); // maxFailedContainers == -1 is infinite number of retries. if (maxFailedContainers >= 0 && failedContainersSoFar > maxFailedContainers) { String msg = "Stopping YARN session because the number of failed containers (" + failedContainersSoFar + ") exceeded the maximum failed containers (" + maxFailedContainers + "). This number is controlled by the '" + ConfigConstants.YARN_MAX_FAILED_CONTAINERS + "' configuration setting. " + "By default its the number of requested containers."; LOG.error(msg); self().tell(decorateMessage(new StopCluster(ApplicationStatus.FAILED, msg)), ActorRef.noSender()); // no need to do anything else return; } } } updateProgress(); // in case failed containers were among the finished containers, make // sure we re-examine and request new ones triggerCheckWorkers(); }
From source file:org.apache.flink.yarn.YarnResourceManagerTest.java
License:Apache License
private static ContainerStatus mockContainerStatus(ContainerId containerId) { ContainerStatus mockContainerStatus = mock(ContainerStatus.class); when(mockContainerStatus.getContainerId()).thenReturn(containerId); when(mockContainerStatus.getState()).thenReturn(ContainerState.COMPLETE); when(mockContainerStatus.getDiagnostics()).thenReturn("Test exit"); when(mockContainerStatus.getExitStatus()).thenReturn(-1); return mockContainerStatus; }
From source file:org.apache.hama.bsp.BSPTaskLauncher.java
License:Apache License
/** * This polls the current container status from container manager. Null if the * container hasn't finished yet.//w ww. j a va2 s . com * * @return * @throws Exception */ public BSPTaskStatus poll() throws Exception { ContainerStatus lastStatus = null; GetContainerStatusesResponse getContainerStatusesResponse = cm.getContainerStatuses(statusRequest); List<ContainerStatus> containerStatuses = getContainerStatusesResponse.getContainerStatuses(); for (ContainerStatus containerStatus : containerStatuses) { LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); if (containerStatus.getContainerId().equals(allocatedContainer.getId())) { lastStatus = containerStatus; break; } } if (lastStatus.getState() != ContainerState.COMPLETE) { return null; } LOG.info(this.id + " Last report comes with exitstatus of " + lastStatus.getExitStatus() + " and diagnose string of " + lastStatus.getDiagnostics()); return new BSPTaskStatus(id, lastStatus.getExitStatus()); }
From source file:org.apache.helix.provisioning.yarn.RMCallbackHandler.java
License:Apache License
@Override public void onContainersCompleted(List<ContainerStatus> completedContainers) { LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { GenericApplicationMaster.LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); synchronized (_genericApplicationMaster.allocatedContainerSet) { _genericApplicationMaster.allocatedContainerSet.remove(containerStatus.getContainerId()); SettableFuture<ContainerStopResponse> stopResponseFuture = _genericApplicationMaster.containerStopMap .remove(containerStatus.getContainerId()); if (stopResponseFuture != null) { ContainerStopResponse value = new ContainerStopResponse(); stopResponseFuture.set(value); } else { SettableFuture<ContainerReleaseResponse> releaseResponseFuture = _genericApplicationMaster.containerReleaseMap .remove(containerStatus.getContainerId()); if (releaseResponseFuture != null) { ContainerReleaseResponse value = new ContainerReleaseResponse(); releaseResponseFuture.set(value); }//from ww w .ja va2 s. com } } // increment counters for completed/failed containers int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { // container failed if (ContainerExitStatus.ABORTED != exitStatus) { } else { // container was killed by framework, possibly preempted // we should re-try as the container was lost for some reason // we do not need to release the container as it would be done // by the RM } } else { // nothing to do // container completed successfully GenericApplicationMaster.LOG.info( "Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } } }
From source file:org.apache.hoya.yarn.appmaster.HoyaAppMaster.java
License:Apache License
@Override //AMRMClientAsync public synchronized void onContainersCompleted(List<ContainerStatus> completedContainers) { LOG_YARN.info("onContainersCompleted([{}]", completedContainers.size()); for (ContainerStatus status : completedContainers) { ContainerId containerId = status.getContainerId(); LOG_YARN.info(/*from w w w .j a v a 2 s . com*/ "Container Completion for" + " containerID={}," + " state={}," + " exitStatus={}," + " diagnostics={}", containerId, status.getState(), status.getExitStatus(), status.getDiagnostics()); // non complete containers should not be here assert (status.getState() == ContainerState.COMPLETE); AppState.NodeCompletionResult result = appState.onCompletedNode(conf, status); if (result.containerFailed) { RoleInstance ri = result.roleInstance; log.error("Role instance {} failed ", ri); } } // ask for more containers if any failed // In the case of Hoya, we don't expect containers to complete since // Hoya is a long running application. Keep track of how many containers // are completing. If too many complete, abort the application // TODO: this needs to be better thought about (and maybe something to // better handle in Yarn for long running apps) try { reviewRequestAndReleaseNodes(); } catch (HoyaInternalStateException e) { log.warn("Exception while flexing nodes", e); } }
From source file:org.apache.ignite.yarn.ApplicationMaster.java
License:Apache License
/** {@inheritDoc} */ public synchronized void onContainersCompleted(List<ContainerStatus> statuses) { for (ContainerStatus status : statuses) { containers.remove(status.getContainerId()); log.log(Level.INFO, "Container completed. Container id: {0}. State: {1}.", new Object[] { status.getContainerId(), status.getState() }); }//w w w . jav a 2 s . com }
From source file:org.apache.metron.maas.service.callback.ContainerRequestListener.java
License:Apache License
@SuppressWarnings("unchecked") @Override// w w w .j a va 2 s. c o m public void onContainersCompleted(List<ContainerStatus> completedContainers) { LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); removeContainer(containerStatus.getContainerId()); LOG.info("REMOVING CONTAINER " + containerStatus.getContainerId()); serviceDiscoverer.unregisterByContainer(containerStatus.getContainerId() + ""); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); // increment counters for completed/failed containers int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { // container failed if (ContainerExitStatus.ABORTED != exitStatus) { // shell script failed // counts as completed } else { // container was killed by framework, possibly preempted // we should re-try as the container was lost for some reason // we do not need to release the container as it would be done // by the RM } } else { // nothing to do // container completed successfully LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } if (timelineClient != null) { YarnUtils.INSTANCE.publishContainerEndEvent(timelineClient, containerStatus, domainId, appSubmitterUgi); } } }
From source file:org.apache.metron.maas.service.yarn.YarnUtils.java
License:Apache License
public void publishContainerEndEvent(final TimelineClient timelineClient, ContainerStatus container, String domainId, UserGroupInformation ugi) { final TimelineEntity entity = new TimelineEntity(); entity.setEntityId(container.getContainerId().toString()); entity.setEntityType(ApplicationMaster.DSEntity.DS_CONTAINER.toString()); entity.setDomainId(domainId);/*from ww w .ja va 2 s . c om*/ entity.addPrimaryFilter("user", ugi.getShortUserName()); TimelineEvent event = new TimelineEvent(); event.setTimestamp(System.currentTimeMillis()); event.setEventType(ContainerEvents.CONTAINER_END.toString()); event.addEventInfo("State", container.getState().name()); event.addEventInfo("Exit Status", container.getExitStatus()); entity.addEvent(event); try { timelineClient.putEntities(entity); } catch (YarnException | IOException e) { LOG.error("Container end event could not be published for " + container.getContainerId().toString(), e); } }
From source file:org.apache.myriad.scheduler.fgs.NMHeartBeatHandler.java
License:Apache License
private boolean containerInUse(ContainerStatus status) { return (status.getState() == ContainerState.NEW || status.getState() == ContainerState.RUNNING); }
From source file:org.apache.reef.runtime.yarn.driver.YarnContainerManager.java
License:Apache License
/** * Handles container status reports. Calls come from YARN. * * @param value containing the container status *//* w w w . j ava 2s . com*/ private void onContainerStatus(final ContainerStatus value) { final String containerId = value.getContainerId().toString(); final boolean hasContainer = this.containers.hasContainer(containerId); if (hasContainer) { LOG.log(Level.FINE, "Received container status: {0}", containerId); final ResourceStatusProto.Builder status = ResourceStatusProto.newBuilder().setIdentifier(containerId); switch (value.getState()) { case COMPLETE: LOG.log(Level.FINE, "Container completed: status {0}", value.getExitStatus()); switch (value.getExitStatus()) { case 0: status.setState(ReefServiceProtos.State.DONE); break; case 143: status.setState(ReefServiceProtos.State.KILLED); break; default: status.setState(ReefServiceProtos.State.FAILED); } status.setExitCode(value.getExitStatus()); // remove the completed container (can be either done/killed/failed) from book keeping this.containers.removeAndGet(containerId); logContainerRemoval(containerId); break; default: LOG.info("Container running"); status.setState(ReefServiceProtos.State.RUNNING); } if (value.getDiagnostics() != null) { LOG.log(Level.FINE, "Container diagnostics: {0}", value.getDiagnostics()); status.setDiagnostics(value.getDiagnostics()); } this.reefEventHandlers.onResourceStatus(status.build()); } }