Example usage for org.apache.hadoop.yarn.api.records ContainerState COMPLETE

List of usage examples for org.apache.hadoop.yarn.api.records ContainerState COMPLETE

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records ContainerState COMPLETE.

Prototype

ContainerState COMPLETE

To view the source code for org.apache.hadoop.yarn.api.records ContainerState COMPLETE.

Click Source Link

Document

Completed container

Usage

From source file:com.datatorrent.stram.StreamingAppMasterService.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException//from   www  .  j  a  v a 2s  . c om
 */
@SuppressWarnings("SleepWhileInLoop")
private void execute() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    LOG.info("number of tokens: {}", credentials.getAllTokens().size());
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.debug("token: {}", token);
    }
    final Configuration conf = getConfig();
    long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math
            .min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME)));
    long expiryTime = System.currentTimeMillis() + tokenLifeTime;
    LOG.debug(" expiry token time {}", tokenLifeTime);
    String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE);

    // Register self with ResourceManager
    RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0,
            appMasterTrackingUrl);

    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    int maxVcores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max mem {}m and vcores {} capabililty of resources in this cluster ", maxMem, maxVcores);

    // for locality relaxation fall back
    Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps
            .newHashMap();

    // Setup heartbeat emitter
    // TODO poll RM every now and then with an empty request to let RM know that we are alive
    // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting:
    // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS
    // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter
    // is not required.

    int loopCounter = -1;
    List<ContainerId> releasedContainers = new ArrayList<ContainerId>();
    int numTotalContainers = 0;
    // keep track of already requested containers to not request them again while waiting for allocation
    int numRequestedContainers = 0;
    int numReleasedContainers = 0;
    int nextRequestPriority = 0;
    ResourceRequestHandler resourceRequestor = new ResourceRequestHandler();

    YarnClient clientRMService = YarnClient.createYarnClient();

    try {
        // YARN-435
        // we need getClusterNodes to populate the initial node list,
        // subsequent updates come through the heartbeat response
        clientRMService.init(conf);
        clientRMService.start();

        ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService,
                dag.getAttributes().get(DAG.APPLICATION_NAME),
                UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID));
        if (ar != null) {
            appDone = true;
            dnmgr.shutdownDiagnosticsMessage = String.format(
                    "Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.",
                    ar.getApplicationId().toString(), ar.getName(), ar.getUser());
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finishApplication(FinalApplicationStatus.FAILED, numTotalContainers);
            return;
        }
        resourceRequestor.updateNodeReports(clientRMService.getNodeReports());
    } catch (Exception e) {
        throw new RuntimeException("Failed to retrieve cluster nodes report.", e);
    } finally {
        clientRMService.stop();
    }

    // check for previously allocated containers
    // as of 2.2, containers won't survive AM restart, but this will change in the future - YARN-1490
    checkContainerStatus();
    FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
    final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS,
            YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);

    while (!appDone) {
        loopCounter++;

        if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime
                && hdfsKeyTabFile != null) {
            String applicationId = appAttemptID.getApplicationId().toString();
            expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, "." + File.separator + "tmp",
                    applicationId, conf, hdfsKeyTabFile, credentials, rmAddress, true);
        }

        Runnable r;
        while ((r = this.pendingTasks.poll()) != null) {
            r.run();
        }

        // log current state
        /*
         * LOG.info("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" +
         * numTotalContainers + ", requested=" + numRequestedContainers + ", completed=" + numCompletedContainers +
         * ", failed=" + numFailedContainers + ", currentAllocated=" + this.allAllocatedContainers.size());
         */
        // Sleep before each loop when asking RM for containers
        // to avoid flooding RM with spurious requests when it
        // need not have any available containers
        try {
            sleep(1000);
        } catch (InterruptedException e) {
            LOG.info("Sleep interrupted " + e.getMessage());
        }

        // Setup request to be sent to RM to allocate containers
        List<ContainerRequest> containerRequests = new ArrayList<ContainerRequest>();
        List<ContainerRequest> removedContainerRequests = new ArrayList<ContainerRequest>();

        // request containers for pending deploy requests
        if (!dnmgr.containerStartRequests.isEmpty()) {
            StreamingContainerAgent.ContainerStartRequest csr;
            while ((csr = dnmgr.containerStartRequests.poll()) != null) {
                if (csr.container.getRequiredMemoryMB() > maxMem) {
                    LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.",
                            csr.container.getRequiredMemoryMB(), maxMem);
                    csr.container.setRequiredMemoryMB(maxMem);
                }
                if (csr.container.getRequiredVCores() > maxVcores) {
                    LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.",
                            csr.container.getRequiredVCores(), maxVcores);
                    csr.container.setRequiredVCores(maxVcores);
                }
                csr.container.setResourceRequestPriority(nextRequestPriority++);
                ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true);
                MutablePair<Integer, ContainerRequest> pair = new MutablePair<Integer, ContainerRequest>(
                        loopCounter, cr);
                requestedResources.put(csr, pair);
                containerRequests.add(cr);
            }
        }

        if (!requestedResources.isEmpty()) {
            //resourceRequestor.clearNodeMapping();
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) {
                    StreamingContainerAgent.ContainerStartRequest csr = entry.getKey();
                    removedContainerRequests.add(entry.getValue().getRight());
                    ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false);
                    entry.getValue().setLeft(loopCounter);
                    entry.getValue().setRight(cr);
                    containerRequests.add(cr);
                }
            }
        }

        numTotalContainers += containerRequests.size();
        numRequestedContainers += containerRequests.size();
        AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests,
                releasedContainers);
        if (amResp.getAMCommand() != null) {
            LOG.info(" statement executed:{}", amResp.getAMCommand());
            switch (amResp.getAMCommand()) {
            case AM_RESYNC:
            case AM_SHUTDOWN:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");
            default:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");

            }
        }
        releasedContainers.clear();

        // Retrieve list of allocated containers from the response
        List<Container> newAllocatedContainers = amResp.getAllocatedContainers();
        // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size());
        numRequestedContainers -= newAllocatedContainers.size();
        long timestamp = System.currentTimeMillis();
        for (Container allocatedContainer : newAllocatedContainers) {

            LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode="
                    + allocatedContainer.getNodeId() + ", containerNodeURI="
                    + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory"
                    + allocatedContainer.getResource().getMemory() + ", priority"
                    + allocatedContainer.getPriority());
            // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString());

            boolean alreadyAllocated = true;
            StreamingContainerAgent.ContainerStartRequest csr = null;
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority()
                        .getPriority()) {
                    alreadyAllocated = false;
                    csr = entry.getKey();
                    break;
                }
            }

            if (alreadyAllocated) {
                LOG.info("Releasing {} as resource with priority {} was already assigned",
                        allocatedContainer.getId(), allocatedContainer.getPriority());
                releasedContainers.add(allocatedContainer.getId());
                numReleasedContainers++;
                numRequestedContainers++;
                continue;
            }
            if (csr != null) {
                requestedResources.remove(csr);
            }

            // allocate resource to container
            ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(),
                    allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(),
                    allocatedContainer.getResource().getMemory(),
                    allocatedContainer.getResource().getVirtualCores(),
                    allocatedContainer.getNodeHttpAddress());
            StreamingContainerAgent sca = dnmgr.assignContainer(resource, null);

            if (sca == null) {
                // allocated container no longer needed, add release request
                LOG.warn("Container {} allocated but nothing to deploy, going to release this container.",
                        allocatedContainer.getId());
                releasedContainers.add(allocatedContainer.getId());
            } else {
                AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer);
                this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder);
                ByteBuffer tokens = null;
                if (UserGroupInformation.isSecurityEnabled()) {
                    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                    Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken(
                            ugi.getUserName(), heartbeatListener.getAddress());
                    allocatedContainerHolder.delegationToken = delegationToken;
                    //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress());
                    tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken);
                }
                LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer,
                        nmClient, sca, tokens);
                // Thread launchThread = new Thread(runnableLaunchContainer);
                // launchThreads.add(launchThread);
                // launchThread.start();
                launchContainer.run(); // communication with NMs is now async

                // record container start event
                StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(),
                        allocatedContainer.getNodeId().toString());
                ev.setTimestamp(timestamp);
                dnmgr.recordEventAsync(ev);
            }
        }

        // track node updates for future locality constraint allocations
        // TODO: it seems 2.0.4-alpha doesn't give us any updates
        resourceRequestor.updateNodeReports(amResp.getUpdatedNodes());

        // Check the completed containers
        List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses();
        // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size());
        for (ContainerStatus containerStatus : completedContainers) {
            LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state="
                    + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                    + ", diagnostics=" + containerStatus.getDiagnostics());

            // non complete containers should not be here
            assert (containerStatus.getState() == ContainerState.COMPLETE);

            AllocatedContainer allocatedContainer = allocatedContainers
                    .remove(containerStatus.getContainerId().toString());
            if (allocatedContainer != null && allocatedContainer.delegationToken != null) {
                UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName());
            }
            int exitStatus = containerStatus.getExitStatus();
            if (0 != exitStatus) {
                if (allocatedContainer != null) {
                    numFailedContainers.incrementAndGet();
                }
                //          if (exitStatus == 1) {
                //            // non-recoverable StreamingContainer failure
                //            appDone = true;
                //            finalStatus = FinalApplicationStatus.FAILED;
                //            dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId();
                //            LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage);
                //          }
                //          else {
                // Recoverable failure or process killed (externally or via stop request by AM)
                // also occurs when a container was released by the application but never assigned/launched
                LOG.debug("Container {} failed or killed.", containerStatus.getContainerId());
                dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString());
                //          }
            } else {
                // container completed successfully
                numCompletedContainers.incrementAndGet();
                LOG.info("Container completed successfully." + ", containerId="
                        + containerStatus.getContainerId());
            }

            String containerIdStr = containerStatus.getContainerId().toString();
            dnmgr.removeContainerAgent(containerIdStr);

            // record container stop event
            StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus());
            ev.setReason(containerStatus.getDiagnostics());
            dnmgr.recordEventAsync(ev);
        }

        if (dnmgr.forcedShutdown) {
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finalStatus = FinalApplicationStatus.FAILED;
            appDone = true;
        } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0
                && dnmgr.containerStartRequests.isEmpty()) {
            LOG.debug("Exiting as no more containers are allocated or requested");
            finalStatus = FinalApplicationStatus.SUCCEEDED;
            appDone = true;
        }

        LOG.debug("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total="
                + numTotalContainers + ", requested=" + numRequestedContainers + ", released="
                + numReleasedContainers + ", completed=" + numCompletedContainers + ", failed="
                + numFailedContainers + ", currentAllocated=" + allocatedContainers.size());

        // monitor child containers
        dnmgr.monitorHeartbeat();
    }

    finishApplication(finalStatus, numTotalContainers);
}

From source file:edu.cmu.graphchi.toolkits.collaborative_filtering.yarn.ApplicationMaster.java

License:Apache License

public void onContainersCompleted(List<ContainerStatus> completedContainers) {
    LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size());
    for (ContainerStatus containerStatus : completedContainers) {
        LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state="
                + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                + ", diagnostics=" + containerStatus.getDiagnostics());

        // non complete containers should not be here
        assert (containerStatus.getState() == ContainerState.COMPLETE);

        // increment counters for completed/failed containers
        int exitStatus = containerStatus.getExitStatus();
        if (0 != exitStatus) {
            // container failed
            if (ContainerExitStatus.ABORTED != exitStatus) {
                // shell script failed
                // counts as completed
                numCompletedContainers.incrementAndGet();
                numFailedContainers.incrementAndGet();
            } else {
                // container was killed by framework, possibly preempted
                // we should re-try as the container was lost for some reason
                //TODO: Add retry
                numCompletedContainers.incrementAndGet();
                numFailedContainers.incrementAndGet();

                // we do not need to release the container as it would be done
                // by the RM
            }//  w w  w.j a v  a  2s.c o m
        } else {
            //nothing to do
            // container completed successfully
            numCompletedContainers.incrementAndGet();
            LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId());
        }
    }
}

From source file:io.hops.util.DBUtility.java

License:Apache License

public static RMNode processHopRMNodeCompsForScheduler(RMNodeComps hopRMNodeComps, RMContext rmContext)
        throws InvalidProtocolBufferException {
    org.apache.hadoop.yarn.api.records.NodeId nodeId;
    RMNode rmNode = null;/*from w w  w  .java2  s .co  m*/
    if (hopRMNodeComps != null) {
        nodeId = ConverterUtils.toNodeId(hopRMNodeComps.getRMNodeId());
        rmNode = rmContext.getRMNodes().get(nodeId);

        // The first time we are receiving the RMNode, this will happen when the node registers
        if (rmNode == null) {
            // Retrieve heartbeat
            boolean nextHeartbeat = true;

            // Create Resource
            Resource resource = null;
            if (hopRMNodeComps.getHopResource() != null) {
                resource = Resource.newInstance(hopRMNodeComps.getHopResource().getMemory(),
                        hopRMNodeComps.getHopResource().getVirtualCores());
            } else {
                LOG.error("ResourceOption should not be null");
                resource = Resource.newInstance(0, 0);
            }
            /*rmNode = new RMNodeImplDist(nodeId, rmContext, hopRMNodeComps.getHopRMNode().getHostName(),
                    hopRMNodeComps.getHopRMNode().getCommandPort(),
                    hopRMNodeComps.getHopRMNode().getHttpPort(),
                    ResourceTrackerService.resolve(hopRMNodeComps.getHopRMNode().getHostName()),
                    resourceOption,
                    hopRMNodeComps.getHopRMNode().getNodemanagerVersion(),
                    hopRMNodeComps.getHopRMNode().getHealthReport(),
                    hopRMNodeComps.getHopRMNode().getLastHealthReportTime(),
                    nextHeartbeat);*/

            rmNode = new RMNodeImplDist(nodeId, rmContext, hopRMNodeComps.getHopRMNode().getHostName(),
                    hopRMNodeComps.getHopRMNode().getCommandPort(), hopRMNodeComps.getHopRMNode().getHttpPort(),
                    ResourceTrackerService.resolve(hopRMNodeComps.getHopRMNode().getHostName()), resource,
                    hopRMNodeComps.getHopRMNode().getNodemanagerVersion());

            // Force Java to put the host in cache
            NetUtils.createSocketAddrForHost(nodeId.getHost(), nodeId.getPort());
        }

        // Update the RMNode
        if (hopRMNodeComps.getHopRMNode() != null) {
            ((RMNodeImplDist) rmNode).setState(hopRMNodeComps.getHopRMNode().getCurrentState());
        }
        if (hopRMNodeComps.getHopUpdatedContainerInfo() != null) {
            List<io.hops.metadata.yarn.entity.UpdatedContainerInfo> hopUpdatedContainerInfoList = hopRMNodeComps
                    .getHopUpdatedContainerInfo();

            if (hopUpdatedContainerInfoList != null && !hopUpdatedContainerInfoList.isEmpty()) {
                ConcurrentLinkedQueue<org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo> updatedContainerInfoQueue = new ConcurrentLinkedQueue<>();

                Map<Integer, org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo> ucis = new HashMap<>();
                LOG.debug(hopRMNodeComps.getRMNodeId() + " getting ucis " + hopUpdatedContainerInfoList.size()
                        + " pending event " + hopRMNodeComps.getPendingEvent().getId().getEventId());

                for (io.hops.metadata.yarn.entity.UpdatedContainerInfo hopUCI : hopUpdatedContainerInfoList) {
                    if (!ucis.containsKey(hopUCI.getUpdatedContainerInfoId())) {
                        ucis.put(hopUCI.getUpdatedContainerInfoId(),
                                new org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo(
                                        new ArrayList<org.apache.hadoop.yarn.api.records.ContainerStatus>(),
                                        new ArrayList<org.apache.hadoop.yarn.api.records.ContainerStatus>(),
                                        hopUCI.getUpdatedContainerInfoId()));
                    }

                    ContainerId cid = ConverterUtils.toContainerId(hopUCI.getContainerId());
                    io.hops.metadata.yarn.entity.ContainerStatus hopContainerStatus = hopRMNodeComps
                            .getHopContainersStatusMap().get(hopUCI.getContainerId());

                    org.apache.hadoop.yarn.api.records.ContainerStatus conStatus = org.apache.hadoop.yarn.api.records.ContainerStatus
                            .newInstance(cid, ContainerState.valueOf(hopContainerStatus.getState()),
                                    hopContainerStatus.getDiagnostics(), hopContainerStatus.getExitstatus());

                    // Check ContainerStatus state to add it in the appropriate list
                    if (conStatus != null) {
                        LOG.debug("add uci for container " + conStatus.getContainerId() + " status "
                                + conStatus.getState());
                        if (conStatus.getState().equals(ContainerState.RUNNING)) {
                            ucis.get(hopUCI.getUpdatedContainerInfoId()).getNewlyLaunchedContainers()
                                    .add(conStatus);
                        } else if (conStatus.getState().equals(ContainerState.COMPLETE)) {
                            ucis.get(hopUCI.getUpdatedContainerInfoId()).getCompletedContainers()
                                    .add(conStatus);
                        }
                    }
                }

                for (org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo uci : ucis
                        .values()) {
                    updatedContainerInfoQueue.add(uci);
                }

                ((RMNodeImplDist) rmNode).setUpdatedContainerInfo(updatedContainerInfoQueue);
            } else {
                LOG.debug(hopRMNodeComps.getRMNodeId()
                        + " hopUpdatedContainerInfoList = null || hopUpdatedContainerInfoList.isEmpty() "
                        + hopRMNodeComps.getPendingEvent().getId().getEventId());
            }
        } else {
            LOG.debug(hopRMNodeComps.getRMNodeId() + " hopRMNodeFull.getHopUpdatedContainerInfo()=null "
                    + hopRMNodeComps.getPendingEvent().getId().getEventId());
        }
    }

    return rmNode;
}

From source file:org.apache.flink.yarn.YarnResourceManagerTest.java

License:Apache License

private static ContainerStatus mockContainerStatus(ContainerId containerId) {
    ContainerStatus mockContainerStatus = mock(ContainerStatus.class);

    when(mockContainerStatus.getContainerId()).thenReturn(containerId);
    when(mockContainerStatus.getState()).thenReturn(ContainerState.COMPLETE);
    when(mockContainerStatus.getDiagnostics()).thenReturn("Test exit");
    when(mockContainerStatus.getExitStatus()).thenReturn(-1);

    return mockContainerStatus;
}

From source file:org.apache.hama.bsp.BSPTaskLauncher.java

License:Apache License

/**
 * This polls the current container status from container manager. Null if the
 * container hasn't finished yet./* w  ww  .j  av a2s  .  c  o  m*/
 * 
 * @return
 * @throws Exception
 */
public BSPTaskStatus poll() throws Exception {

    ContainerStatus lastStatus = null;
    GetContainerStatusesResponse getContainerStatusesResponse = cm.getContainerStatuses(statusRequest);
    List<ContainerStatus> containerStatuses = getContainerStatusesResponse.getContainerStatuses();
    for (ContainerStatus containerStatus : containerStatuses) {
        LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state="
                + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                + ", diagnostics=" + containerStatus.getDiagnostics());

        if (containerStatus.getContainerId().equals(allocatedContainer.getId())) {
            lastStatus = containerStatus;
            break;
        }
    }
    if (lastStatus.getState() != ContainerState.COMPLETE) {
        return null;
    }
    LOG.info(this.id + " Last report comes with exitstatus of " + lastStatus.getExitStatus()
            + " and diagnose string of " + lastStatus.getDiagnostics());

    return new BSPTaskStatus(id, lastStatus.getExitStatus());
}

From source file:org.apache.helix.provisioning.yarn.RMCallbackHandler.java

License:Apache License

@Override
public void onContainersCompleted(List<ContainerStatus> completedContainers) {
    LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size());
    for (ContainerStatus containerStatus : completedContainers) {
        GenericApplicationMaster.LOG.info("Got container status for containerID="
                + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus="
                + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics());

        // non complete containers should not be here
        assert (containerStatus.getState() == ContainerState.COMPLETE);
        synchronized (_genericApplicationMaster.allocatedContainerSet) {
            _genericApplicationMaster.allocatedContainerSet.remove(containerStatus.getContainerId());
            SettableFuture<ContainerStopResponse> stopResponseFuture = _genericApplicationMaster.containerStopMap
                    .remove(containerStatus.getContainerId());
            if (stopResponseFuture != null) {
                ContainerStopResponse value = new ContainerStopResponse();
                stopResponseFuture.set(value);
            } else {
                SettableFuture<ContainerReleaseResponse> releaseResponseFuture = _genericApplicationMaster.containerReleaseMap
                        .remove(containerStatus.getContainerId());
                if (releaseResponseFuture != null) {
                    ContainerReleaseResponse value = new ContainerReleaseResponse();
                    releaseResponseFuture.set(value);
                }/*w w  w.j a  v a 2 s.  c  o m*/
            }
        }
        // increment counters for completed/failed containers
        int exitStatus = containerStatus.getExitStatus();
        if (0 != exitStatus) {
            // container failed
            if (ContainerExitStatus.ABORTED != exitStatus) {

            } else {
                // container was killed by framework, possibly preempted
                // we should re-try as the container was lost for some reason

                // we do not need to release the container as it would be done
                // by the RM
            }
        } else {
            // nothing to do
            // container completed successfully
            GenericApplicationMaster.LOG.info(
                    "Container completed successfully." + ", containerId=" + containerStatus.getContainerId());
        }
    }
}

From source file:org.apache.hoya.yarn.appmaster.HoyaAppMaster.java

License:Apache License

@Override //AMRMClientAsync
public synchronized void onContainersCompleted(List<ContainerStatus> completedContainers) {
    LOG_YARN.info("onContainersCompleted([{}]", completedContainers.size());
    for (ContainerStatus status : completedContainers) {
        ContainerId containerId = status.getContainerId();
        LOG_YARN.info(//from   w  w  w  .j a v a  2 s  .c o  m
                "Container Completion for" + " containerID={}," + " state={}," + " exitStatus={},"
                        + " diagnostics={}",
                containerId, status.getState(), status.getExitStatus(), status.getDiagnostics());

        // non complete containers should not be here
        assert (status.getState() == ContainerState.COMPLETE);
        AppState.NodeCompletionResult result = appState.onCompletedNode(conf, status);
        if (result.containerFailed) {
            RoleInstance ri = result.roleInstance;
            log.error("Role instance {} failed ", ri);
        }
    }

    // ask for more containers if any failed
    // In the case of Hoya, we don't expect containers to complete since
    // Hoya is a long running application. Keep track of how many containers
    // are completing. If too many complete, abort the application
    // TODO: this needs to be better thought about (and maybe something to
    // better handle in Yarn for long running apps)

    try {
        reviewRequestAndReleaseNodes();
    } catch (HoyaInternalStateException e) {
        log.warn("Exception while flexing nodes", e);
    }
}

From source file:org.apache.metron.maas.service.callback.ContainerRequestListener.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from   w  w  w .jav a2s  . c o m*/
public void onContainersCompleted(List<ContainerStatus> completedContainers) {
    LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size());
    for (ContainerStatus containerStatus : completedContainers) {
        LOG.info("Got container status for containerID=" + containerStatus.getContainerId() + ", state="
                + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                + ", diagnostics=" + containerStatus.getDiagnostics());
        removeContainer(containerStatus.getContainerId());
        LOG.info("REMOVING CONTAINER " + containerStatus.getContainerId());
        serviceDiscoverer.unregisterByContainer(containerStatus.getContainerId() + "");
        // non complete containers should not be here
        assert (containerStatus.getState() == ContainerState.COMPLETE);
        // increment counters for completed/failed containers
        int exitStatus = containerStatus.getExitStatus();
        if (0 != exitStatus) {
            // container failed
            if (ContainerExitStatus.ABORTED != exitStatus) {
                // shell script failed
                // counts as completed
            } else {
                // container was killed by framework, possibly preempted
                // we should re-try as the container was lost for some reason
                // we do not need to release the container as it would be done
                // by the RM
            }
        } else {
            // nothing to do
            // container completed successfully
            LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId());
        }
        if (timelineClient != null) {
            YarnUtils.INSTANCE.publishContainerEndEvent(timelineClient, containerStatus, domainId,
                    appSubmitterUgi);
        }
    }
}

From source file:org.apache.slider.server.appmaster.SliderAppMaster.java

License:Apache License

@Override //AMRMClientAsync
public synchronized void onContainersCompleted(List<ContainerStatus> completedContainers) {
    LOG_YARN.info("onContainersCompleted([{}]", completedContainers.size());
    for (ContainerStatus status : completedContainers) {
        ContainerId containerId = status.getContainerId();
        LOG_YARN.info(/*  www.  j  a  v a2 s .co  m*/
                "Container Completion for" + " containerID={}," + " state={}," + " exitStatus={},"
                        + " diagnostics={}",
                containerId, status.getState(), status.getExitStatus(), status.getDiagnostics());

        // non complete containers should not be here
        assert (status.getState() == ContainerState.COMPLETE);
        AppState.NodeCompletionResult result = appState.onCompletedNode(status);
        if (result.containerFailed) {
            RoleInstance ri = result.roleInstance;
            log.error("Role instance {} failed ", ri);
        }

        //  known nodes trigger notifications
        if (!result.unknownNode) {
            getProviderService().notifyContainerCompleted(containerId);
            queue(new UnregisterComponentInstance(containerId, 0, TimeUnit.MILLISECONDS));
        }
    }

    reviewRequestAndReleaseNodes("onContainersCompleted");
}

From source file:org.springframework.yarn.am.cluster.AbstractManagedContainerClusterAppmasterTests.java

License:Apache License

protected void releaseContainer(Object appmaster, Container container) throws Exception {
    ContainerStatus containerStatus = MockUtils.getMockContainerStatus(container.getId(),
            ContainerState.COMPLETE, 0);
    TestUtils.callMethod("onContainerCompleted", appmaster, new Object[] { containerStatus },
            new Class<?>[] { ContainerStatus.class });
}