Example usage for org.apache.hadoop.yarn.api.records ContainerStatus getState

List of usage examples for org.apache.hadoop.yarn.api.records ContainerStatus getState

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records ContainerStatus getState.

Prototype

@Public
@Stable
public abstract ContainerState getState();

Source Link

Document

Get the ContainerState of the container.

Usage

From source file:org.apache.flink.yarn.YarnFlinkResourceManager.java

License:Apache License

/**
 * Invoked when the ResourceManager informs of completed containers.
 * Called via an actor message by the callback from the ResourceManager client.
 * //from  w  w w.  j  a v a2 s  .  c  o  m
 * @param containers The containers that have completed.
 */
private void containersComplete(List<ContainerStatus> containers) {
    // the list contains both failed containers, as well as containers that
    // were gracefully returned by this application master

    for (ContainerStatus status : containers) {
        final ResourceID id = new ResourceID(status.getContainerId().toString());

        // check if this is a failed container or a completed container
        if (containersBeingReturned.remove(status.getContainerId()) != null) {
            // regular completed container that we released
            LOG.info("Container {} completed successfully with diagnostics: {}", id, status.getDiagnostics());
        } else {
            // failed container, either at startup, or running
            final String exitStatus;
            switch (status.getExitStatus()) {
            case -103:
                exitStatus = "Vmem limit exceeded (-103)";
                break;
            case -104:
                exitStatus = "Pmem limit exceeded (-104)";
                break;
            default:
                exitStatus = String.valueOf(status.getExitStatus());
            }

            final YarnContainerInLaunch launched = containersInLaunch.remove(id);
            if (launched != null) {
                LOG.info("Container {} failed, with a TaskManager in launch or registration. "
                        + "Exit status: {}", id, exitStatus);
                // we will trigger re-acquiring new containers at the end
            } else {
                // failed registered worker
                LOG.info("Container {} failed. Exit status: {}", id, exitStatus);

                // notify the generic logic, which notifies the JobManager, etc.
                notifyWorkerFailed(id, "Container " + id + " failed. " + "Exit status: {}" + exitStatus);
            }

            // general failure logging
            failedContainersSoFar++;

            String diagMessage = String.format(
                    "Diagnostics for container %s in state %s : " + "exitStatus=%s diagnostics=%s", id,
                    status.getState(), exitStatus, status.getDiagnostics());
            sendInfoMessage(diagMessage);

            LOG.info(diagMessage);
            LOG.info("Total number of failed containers so far: " + failedContainersSoFar);

            // maxFailedContainers == -1 is infinite number of retries.
            if (maxFailedContainers >= 0 && failedContainersSoFar > maxFailedContainers) {
                String msg = "Stopping YARN session because the number of failed containers ("
                        + failedContainersSoFar + ") exceeded the maximum failed containers ("
                        + maxFailedContainers + "). This number is controlled by the '"
                        + ConfigConstants.YARN_MAX_FAILED_CONTAINERS + "' configuration setting. "
                        + "By default its the number of requested containers.";

                LOG.error(msg);
                self().tell(decorateMessage(new StopCluster(ApplicationStatus.FAILED, msg)),
                        ActorRef.noSender());

                // no need to do anything else
                return;
            }
        }
    }

    updateProgress();

    // in case failed containers were among the finished containers, make
    // sure we re-examine and request new ones
    triggerCheckWorkers();
}

From source file:org.apache.flink.yarn.YarnResourceManagerTest.java

License:Apache License

private static ContainerStatus mockContainerStatus(ContainerId containerId) {
    ContainerStatus mockContainerStatus = mock(ContainerStatus.class);

    when(mockContainerStatus.getContainerId()).thenReturn(containerId);
    when(mockContainerStatus.getState()).thenReturn(ContainerState.COMPLETE);
    when(mockContainerStatus.getDiagnostics()).thenReturn("Test exit");
    when(mockContainerStatus.getExitStatus()).thenReturn(-1);

    return mockContainerStatus;
}

From source file:org.apache.hama.bsp.BSPTaskLauncher.java

License:Apache License

/**
 * This polls the current container status from container manager. Null if the
 * container hasn't finished yet.//w ww. j  a va2 s . com
 * 
 * @return
 * @throws Exception
 */
public BSPTaskStatus poll() throws Exception {

    ContainerStatus lastStatus = null;
    GetContainerStatusesResponse getContainerStatusesResponse = cm.getContainerStatuses(statusRequest);
    List<ContainerStatus> containerStatuses = getContainerStatusesResponse.getContainerStatuses();
    for (ContainerStatus containerStatus : containerStatuses) {
        LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state="
                + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                + ", diagnostics=" + containerStatus.getDiagnostics());

        if (containerStatus.getContainerId().equals(allocatedContainer.getId())) {
            lastStatus = containerStatus;
            break;
        }
    }
    if (lastStatus.getState() != ContainerState.COMPLETE) {
        return null;
    }
    LOG.info(this.id + " Last report comes with exitstatus of " + lastStatus.getExitStatus()
            + " and diagnose string of " + lastStatus.getDiagnostics());

    return new BSPTaskStatus(id, lastStatus.getExitStatus());
}

From source file:org.apache.helix.provisioning.yarn.RMCallbackHandler.java

License:Apache License

@Override
public void onContainersCompleted(List<ContainerStatus> completedContainers) {
    LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size());
    for (ContainerStatus containerStatus : completedContainers) {
        GenericApplicationMaster.LOG.info("Got container status for containerID="
                + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus="
                + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics());

        // non complete containers should not be here
        assert (containerStatus.getState() == ContainerState.COMPLETE);
        synchronized (_genericApplicationMaster.allocatedContainerSet) {
            _genericApplicationMaster.allocatedContainerSet.remove(containerStatus.getContainerId());
            SettableFuture<ContainerStopResponse> stopResponseFuture = _genericApplicationMaster.containerStopMap
                    .remove(containerStatus.getContainerId());
            if (stopResponseFuture != null) {
                ContainerStopResponse value = new ContainerStopResponse();
                stopResponseFuture.set(value);
            } else {
                SettableFuture<ContainerReleaseResponse> releaseResponseFuture = _genericApplicationMaster.containerReleaseMap
                        .remove(containerStatus.getContainerId());
                if (releaseResponseFuture != null) {
                    ContainerReleaseResponse value = new ContainerReleaseResponse();
                    releaseResponseFuture.set(value);
                }//from  ww w  .ja va2 s.  com
            }
        }
        // increment counters for completed/failed containers
        int exitStatus = containerStatus.getExitStatus();
        if (0 != exitStatus) {
            // container failed
            if (ContainerExitStatus.ABORTED != exitStatus) {

            } else {
                // container was killed by framework, possibly preempted
                // we should re-try as the container was lost for some reason

                // we do not need to release the container as it would be done
                // by the RM
            }
        } else {
            // nothing to do
            // container completed successfully
            GenericApplicationMaster.LOG.info(
                    "Container completed successfully." + ", containerId=" + containerStatus.getContainerId());
        }
    }
}

From source file:org.apache.hoya.yarn.appmaster.HoyaAppMaster.java

License:Apache License

@Override //AMRMClientAsync
public synchronized void onContainersCompleted(List<ContainerStatus> completedContainers) {
    LOG_YARN.info("onContainersCompleted([{}]", completedContainers.size());
    for (ContainerStatus status : completedContainers) {
        ContainerId containerId = status.getContainerId();
        LOG_YARN.info(/*from   w  w  w  .j  a  v  a  2 s  . com*/
                "Container Completion for" + " containerID={}," + " state={}," + " exitStatus={},"
                        + " diagnostics={}",
                containerId, status.getState(), status.getExitStatus(), status.getDiagnostics());

        // non complete containers should not be here
        assert (status.getState() == ContainerState.COMPLETE);
        AppState.NodeCompletionResult result = appState.onCompletedNode(conf, status);
        if (result.containerFailed) {
            RoleInstance ri = result.roleInstance;
            log.error("Role instance {} failed ", ri);
        }
    }

    // ask for more containers if any failed
    // In the case of Hoya, we don't expect containers to complete since
    // Hoya is a long running application. Keep track of how many containers
    // are completing. If too many complete, abort the application
    // TODO: this needs to be better thought about (and maybe something to
    // better handle in Yarn for long running apps)

    try {
        reviewRequestAndReleaseNodes();
    } catch (HoyaInternalStateException e) {
        log.warn("Exception while flexing nodes", e);
    }
}

From source file:org.apache.ignite.yarn.ApplicationMaster.java

License:Apache License

/** {@inheritDoc} */
public synchronized void onContainersCompleted(List<ContainerStatus> statuses) {
    for (ContainerStatus status : statuses) {
        containers.remove(status.getContainerId());

        log.log(Level.INFO, "Container completed. Container id: {0}. State: {1}.",
                new Object[] { status.getContainerId(), status.getState() });
    }//w w  w . jav  a 2 s  .  com
}

From source file:org.apache.metron.maas.service.callback.ContainerRequestListener.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//  w w w .j a  va 2  s. c o m
public void onContainersCompleted(List<ContainerStatus> completedContainers) {
    LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size());
    for (ContainerStatus containerStatus : completedContainers) {
        LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state="
                + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                + ", diagnostics=" + containerStatus.getDiagnostics());
        removeContainer(containerStatus.getContainerId());
        LOG.info("REMOVING CONTAINER " + containerStatus.getContainerId());
        serviceDiscoverer.unregisterByContainer(containerStatus.getContainerId() + "");
        // non complete containers should not be here
        assert (containerStatus.getState() == ContainerState.COMPLETE);
        // increment counters for completed/failed containers
        int exitStatus = containerStatus.getExitStatus();
        if (0 != exitStatus) {
            // container failed
            if (ContainerExitStatus.ABORTED != exitStatus) {
                // shell script failed
                // counts as completed
            } else {
                // container was killed by framework, possibly preempted
                // we should re-try as the container was lost for some reason
                // we do not need to release the container as it would be done
                // by the RM
            }
        } else {
            // nothing to do
            // container completed successfully
            LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId());
        }
        if (timelineClient != null) {
            YarnUtils.INSTANCE.publishContainerEndEvent(timelineClient, containerStatus, domainId,
                    appSubmitterUgi);
        }
    }
}

From source file:org.apache.metron.maas.service.yarn.YarnUtils.java

License:Apache License

public void publishContainerEndEvent(final TimelineClient timelineClient, ContainerStatus container,
        String domainId, UserGroupInformation ugi) {
    final TimelineEntity entity = new TimelineEntity();
    entity.setEntityId(container.getContainerId().toString());
    entity.setEntityType(ApplicationMaster.DSEntity.DS_CONTAINER.toString());
    entity.setDomainId(domainId);/*from ww  w .ja va  2 s . c om*/
    entity.addPrimaryFilter("user", ugi.getShortUserName());
    TimelineEvent event = new TimelineEvent();
    event.setTimestamp(System.currentTimeMillis());
    event.setEventType(ContainerEvents.CONTAINER_END.toString());
    event.addEventInfo("State", container.getState().name());
    event.addEventInfo("Exit Status", container.getExitStatus());
    entity.addEvent(event);
    try {
        timelineClient.putEntities(entity);
    } catch (YarnException | IOException e) {
        LOG.error("Container end event could not be published for " + container.getContainerId().toString(), e);
    }
}

From source file:org.apache.myriad.scheduler.fgs.NMHeartBeatHandler.java

License:Apache License

private boolean containerInUse(ContainerStatus status) {
    return (status.getState() == ContainerState.NEW || status.getState() == ContainerState.RUNNING);
}

From source file:org.apache.reef.runtime.yarn.driver.YarnContainerManager.java

License:Apache License

/**
 * Handles container status reports. Calls come from YARN.
 *
 * @param value containing the container status
 *//*  w w w  . j ava 2s . com*/
private void onContainerStatus(final ContainerStatus value) {

    final String containerId = value.getContainerId().toString();
    final boolean hasContainer = this.containers.hasContainer(containerId);

    if (hasContainer) {
        LOG.log(Level.FINE, "Received container status: {0}", containerId);

        final ResourceStatusProto.Builder status = ResourceStatusProto.newBuilder().setIdentifier(containerId);

        switch (value.getState()) {
        case COMPLETE:
            LOG.log(Level.FINE, "Container completed: status {0}", value.getExitStatus());
            switch (value.getExitStatus()) {
            case 0:
                status.setState(ReefServiceProtos.State.DONE);
                break;
            case 143:
                status.setState(ReefServiceProtos.State.KILLED);
                break;
            default:
                status.setState(ReefServiceProtos.State.FAILED);
            }
            status.setExitCode(value.getExitStatus());
            // remove the completed container (can be either done/killed/failed) from book keeping
            this.containers.removeAndGet(containerId);
            logContainerRemoval(containerId);
            break;
        default:
            LOG.info("Container running");
            status.setState(ReefServiceProtos.State.RUNNING);
        }

        if (value.getDiagnostics() != null) {
            LOG.log(Level.FINE, "Container diagnostics: {0}", value.getDiagnostics());
            status.setDiagnostics(value.getDiagnostics());
        }

        this.reefEventHandlers.onResourceStatus(status.build());
    }
}