Example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED

List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED.

Prototype

FinalApplicationStatus SUCCEEDED

To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED.

Click Source Link

Document

Application which finished successfully.

Usage

From source file:com.continuuity.weave.internal.yarn.Hadoop21YarnAMClient.java

License:Apache License

@Override
protected void shutDown() throws Exception {
    nmClient.stopAndWait();//ww w  .  j a va 2 s .  c o m
    amrmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, trackerUrl.toString());
    amrmClient.stop();
}

From source file:com.continuuity.weave.yarn.ApplicationMasterService.java

License:Open Source License

private void doStop() throws Exception {
    Thread.interrupted(); // This is just to clear the interrupt flag

    LOG.info("Stop application master with spec: " + WeaveSpecificationAdapter.create().toJson(weaveSpec));

    Set<ContainerId> ids = Sets.newHashSet(runningContainers.getContainerIds());

    runningContainers.stopAll();//from w  w  w  .j  a v  a 2 s  . c om

    int count = 0;
    while (!ids.isEmpty() && count++ < 5) {
        AllocateResponse allocateResponse = amrmClient.allocate(0.0f);
        for (ContainerStatus status : allocateResponse.getAMResponse().getCompletedContainersStatuses()) {
            ids.remove(status.getContainerId());
        }
        TimeUnit.SECONDS.sleep(1);
    }

    amrmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
    amrmClient.stop();
}

From source file:com.datatorrent.stram.InlineAM.java

License:Apache License

public boolean run() throws Exception {
    LOG.info("Starting Client");

    // Connect to ResourceManager
    rmClient.start();//ww  w.  j av a 2s . c o  m
    try {
        // Get a new application id
        YarnClientApplication newApp = rmClient.createApplication();
        ApplicationId appId = newApp.getNewApplicationResponse().getApplicationId();

        // Create launch context for app master
        LOG.info("Setting up application submission context for ASM");
        ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);

        // set the application id
        appContext.setApplicationId(appId);
        // set the application name
        appContext.setApplicationName(appName);

        // Set the priority for the application master
        Priority pri = Records.newRecord(Priority.class);
        pri.setPriority(amPriority);
        appContext.setPriority(pri);

        // Set the queue to which this application is to be submitted in the RM
        appContext.setQueue(amQueue);

        // Set up the container launch context for the application master
        ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
        appContext.setAMContainerSpec(amContainer);

        // unmanaged AM
        appContext.setUnmanagedAM(true);
        LOG.info("Setting unmanaged AM");

        // Submit the application to the applications manager
        LOG.info("Submitting application to ASM");
        rmClient.submitApplication(appContext);

        // Monitor the application to wait for launch state
        ApplicationReport appReport = monitorApplication(appId, EnumSet.of(YarnApplicationState.ACCEPTED));
        ApplicationAttemptId attemptId = appReport.getCurrentApplicationAttemptId();
        LOG.info("Launching application with id: " + attemptId);

        // launch AM
        runAM(attemptId);

        // Monitor the application for end state
        appReport = monitorApplication(appId, EnumSet.of(YarnApplicationState.KILLED,
                YarnApplicationState.FAILED, YarnApplicationState.FINISHED));
        YarnApplicationState appState = appReport.getYarnApplicationState();
        FinalApplicationStatus appStatus = appReport.getFinalApplicationStatus();

        LOG.info("App ended with state: " + appReport.getYarnApplicationState() + " and status: " + appStatus);

        boolean success;
        if (YarnApplicationState.FINISHED == appState && FinalApplicationStatus.SUCCEEDED == appStatus) {
            LOG.info("Application has completed successfully.");
            success = true;
        } else {
            LOG.info("Application did finished unsuccessfully." + " YarnState=" + appState.toString()
                    + ", FinalStatus=" + appStatus.toString());
            success = false;
        }

        return success;
    } finally {
        rmClient.stop();
    }
}

From source file:com.datatorrent.stram.StramMiniClusterTest.java

License:Apache License

@Ignore
@Test//from w  ww .  j  av a 2  s.c  om
public void testUnmanagedAM() throws Exception {

    new InlineAM(conf) {
        @Override
        @SuppressWarnings("SleepWhileInLoop")
        public void runAM(ApplicationAttemptId attemptId) throws Exception {
            LOG.debug("AM running {}", attemptId);

            //AMRMClient amRmClient = new AMRMClientImpl(attemptId);
            //amRmClient.init(conf);
            //amRmClient.start();

            YarnClientHelper yarnClient = new YarnClientHelper(conf);
            ApplicationMasterProtocol resourceManager = yarnClient.connectToRM();

            // register with the RM (LAUNCHED -> RUNNING)
            RegisterApplicationMasterRequest appMasterRequest = Records
                    .newRecord(RegisterApplicationMasterRequest.class);
            resourceManager.registerApplicationMaster(appMasterRequest);

            // AM specific logic

            /*
            int containerCount = 1;
            Resource capability = Records.newRecord(Resource.class);
            capability.setMemory(1500);
                    
            Priority priority = Records.newRecord(Priority.class);
            priority.setPriority(10);
                    
            String[] hosts = {"vm1"};
            String[] racks = {"somerack"};
                    
            AMRMClient.ContainerRequest req = new AMRMClient.ContainerRequest(capability, hosts, racks, priority, containerCount);
            amRmClient.addContainerRequest(req);
                    
            for (int i=0; i<100; i++) {
              AllocateResponse ar = amRmClient.allocate(0);
              Thread.sleep(1000);
              LOG.debug("allocateResponse: {}" , ar);
            }
            */

            int responseId = 0;
            AllocateRequest req = Records.newRecord(AllocateRequest.class);
            req.setResponseId(responseId++);

            List<ResourceRequest> lr = Lists.newArrayList();
            lr.add(setupContainerAskForRM("hdev-vm", 1, 128, 10));
            lr.add(setupContainerAskForRM("/default-rack", 1, 128, 10));
            lr.add(setupContainerAskForRM("*", 1, 128, 10));

            req.setAskList(lr);

            LOG.info("Requesting: " + req.getAskList());
            resourceManager.allocate(req);

            for (int i = 0; i < 100; i++) {
                req = Records.newRecord(AllocateRequest.class);
                req.setResponseId(responseId++);

                AllocateResponse ar = resourceManager.allocate(req);
                sleep(1000);
                LOG.debug("allocateResponse: {}", ar);
            }

            // unregister from RM
            FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class);
            finishReq.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED);
            finishReq.setDiagnostics("testUnmanagedAM finished");
            resourceManager.finishApplicationMaster(finishReq);

        }

        private ResourceRequest setupContainerAskForRM(String resourceName, int numContainers,
                int containerMemory, int priority) {
            ResourceRequest request = Records.newRecord(ResourceRequest.class);

            // setup requirements for hosts
            // whether a particular rack/host is needed
            // Refer to apis under org.apache.hadoop.net for more
            // details on how to get figure out rack/host mapping.
            // using * as any host will do for the distributed shell app
            request.setResourceName(resourceName);

            // set no. of containers needed
            request.setNumContainers(numContainers);

            // set the priority for the request
            Priority pri = Records.newRecord(Priority.class);
            pri.setPriority(priority);
            request.setPriority(pri);

            // Set up resource type requirements
            // For now, only memory is supported so we set memory requirements
            Resource capability = Records.newRecord(Resource.class);
            capability.setMemory(containerMemory);
            request.setCapability(capability);

            return request;
        }

    }.run();

}

From source file:com.datatorrent.stram.StramMiniClusterTest.java

License:Apache License

@Ignore
@Test// w  w w  .j  ava2s .  co m
public void testUnmanagedAM2() throws Exception {

    new InlineAM(conf) {

        @Override
        @SuppressWarnings("SleepWhileInLoop")
        public void runAM(ApplicationAttemptId attemptId) throws Exception {
            LOG.debug("AM running {}", attemptId);
            AMRMClient<ContainerRequest> amRmClient = AMRMClient.createAMRMClient();
            amRmClient.init(conf);
            amRmClient.start();

            // register with the RM (LAUNCHED -> RUNNING)
            amRmClient.registerApplicationMaster("", 0, null);

            // AM specific logic

            String[] hosts = { "vm1" };
            String[] racks = { "somerack" };

            Resource capability = Records.newRecord(Resource.class);
            capability.setMemory(1000);

            Priority priority = Records.newRecord(Priority.class);
            priority.setPriority(10);
            AMRMClient.ContainerRequest req = new AMRMClient.ContainerRequest(capability, hosts, racks,
                    priority);
            amRmClient.addContainerRequest(req);
            amRmClient.addContainerRequest(req);

            /*
                    capability = Records.newRecord(Resource.class);
                    capability.setMemory(5512);
                    priority = Records.newRecord(Priority.class);
                    priority.setPriority(11);
                    req = new AMRMClient.ContainerRequest(capability, hosts, racks, priority, 3);
                    amRmClient.addContainerRequest(req);
            */
            for (int i = 0; i < 100; i++) {
                AllocateResponse ar = amRmClient.allocate(0);
                sleep(1000);
                LOG.debug("allocateResponse: {}", ar);
                for (Container c : ar.getAllocatedContainers()) {
                    LOG.debug("*** allocated {}", c.getResource());
                    amRmClient.removeContainerRequest(req);
                }
                /*
                GetClusterNodesRequest request =
                    Records.newRecord(GetClusterNodesRequest.class);
                ClientRMService clientRMService = yarnCluster.getResourceManager().getClientRMService();
                GetClusterNodesResponse response = clientRMService.getClusterNodes(request);
                List<NodeReport> nodeReports = response.getNodeReports();
                LOG.info(nodeReports);
                        
                for (NodeReport nr: nodeReports) {
                  LOG.info("Node: " + nr.getNodeId());
                  LOG.info("Total memory: " + nr.getCapability());
                  LOG.info("Used memory: " + nr.getUsed());
                  LOG.info("Number containers: " + nr.getNumContainers());
                }
                */
            }

            // unregister from RM
            amRmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "testUnmanagedAM finished",
                    null);

        }

    }.run();

}

From source file:com.datatorrent.stram.StreamingAppMasterService.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/*from  w  ww. j a  v a 2  s . co m*/
 */
@SuppressWarnings("SleepWhileInLoop")
private void execute() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    LOG.info("number of tokens: {}", credentials.getAllTokens().size());
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.debug("token: {}", token);
    }
    final Configuration conf = getConfig();
    long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math
            .min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME)));
    long expiryTime = System.currentTimeMillis() + tokenLifeTime;
    LOG.debug(" expiry token time {}", tokenLifeTime);
    String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE);

    // Register self with ResourceManager
    RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0,
            appMasterTrackingUrl);

    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    int maxVcores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max mem {}m and vcores {} capabililty of resources in this cluster ", maxMem, maxVcores);

    // for locality relaxation fall back
    Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps
            .newHashMap();

    // Setup heartbeat emitter
    // TODO poll RM every now and then with an empty request to let RM know that we are alive
    // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting:
    // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS
    // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter
    // is not required.

    int loopCounter = -1;
    List<ContainerId> releasedContainers = new ArrayList<ContainerId>();
    int numTotalContainers = 0;
    // keep track of already requested containers to not request them again while waiting for allocation
    int numRequestedContainers = 0;
    int numReleasedContainers = 0;
    int nextRequestPriority = 0;
    ResourceRequestHandler resourceRequestor = new ResourceRequestHandler();

    YarnClient clientRMService = YarnClient.createYarnClient();

    try {
        // YARN-435
        // we need getClusterNodes to populate the initial node list,
        // subsequent updates come through the heartbeat response
        clientRMService.init(conf);
        clientRMService.start();

        ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService,
                dag.getAttributes().get(DAG.APPLICATION_NAME),
                UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID));
        if (ar != null) {
            appDone = true;
            dnmgr.shutdownDiagnosticsMessage = String.format(
                    "Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.",
                    ar.getApplicationId().toString(), ar.getName(), ar.getUser());
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finishApplication(FinalApplicationStatus.FAILED, numTotalContainers);
            return;
        }
        resourceRequestor.updateNodeReports(clientRMService.getNodeReports());
    } catch (Exception e) {
        throw new RuntimeException("Failed to retrieve cluster nodes report.", e);
    } finally {
        clientRMService.stop();
    }

    // check for previously allocated containers
    // as of 2.2, containers won't survive AM restart, but this will change in the future - YARN-1490
    checkContainerStatus();
    FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
    final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS,
            YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);

    while (!appDone) {
        loopCounter++;

        if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime
                && hdfsKeyTabFile != null) {
            String applicationId = appAttemptID.getApplicationId().toString();
            expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, "." + File.separator + "tmp",
                    applicationId, conf, hdfsKeyTabFile, credentials, rmAddress, true);
        }

        Runnable r;
        while ((r = this.pendingTasks.poll()) != null) {
            r.run();
        }

        // log current state
        /*
         * LOG.info("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" +
         * numTotalContainers + ", requested=" + numRequestedContainers + ", completed=" + numCompletedContainers +
         * ", failed=" + numFailedContainers + ", currentAllocated=" + this.allAllocatedContainers.size());
         */
        // Sleep before each loop when asking RM for containers
        // to avoid flooding RM with spurious requests when it
        // need not have any available containers
        try {
            sleep(1000);
        } catch (InterruptedException e) {
            LOG.info("Sleep interrupted " + e.getMessage());
        }

        // Setup request to be sent to RM to allocate containers
        List<ContainerRequest> containerRequests = new ArrayList<ContainerRequest>();
        List<ContainerRequest> removedContainerRequests = new ArrayList<ContainerRequest>();

        // request containers for pending deploy requests
        if (!dnmgr.containerStartRequests.isEmpty()) {
            StreamingContainerAgent.ContainerStartRequest csr;
            while ((csr = dnmgr.containerStartRequests.poll()) != null) {
                if (csr.container.getRequiredMemoryMB() > maxMem) {
                    LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.",
                            csr.container.getRequiredMemoryMB(), maxMem);
                    csr.container.setRequiredMemoryMB(maxMem);
                }
                if (csr.container.getRequiredVCores() > maxVcores) {
                    LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.",
                            csr.container.getRequiredVCores(), maxVcores);
                    csr.container.setRequiredVCores(maxVcores);
                }
                csr.container.setResourceRequestPriority(nextRequestPriority++);
                ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true);
                MutablePair<Integer, ContainerRequest> pair = new MutablePair<Integer, ContainerRequest>(
                        loopCounter, cr);
                requestedResources.put(csr, pair);
                containerRequests.add(cr);
            }
        }

        if (!requestedResources.isEmpty()) {
            //resourceRequestor.clearNodeMapping();
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) {
                    StreamingContainerAgent.ContainerStartRequest csr = entry.getKey();
                    removedContainerRequests.add(entry.getValue().getRight());
                    ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false);
                    entry.getValue().setLeft(loopCounter);
                    entry.getValue().setRight(cr);
                    containerRequests.add(cr);
                }
            }
        }

        numTotalContainers += containerRequests.size();
        numRequestedContainers += containerRequests.size();
        AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests,
                releasedContainers);
        if (amResp.getAMCommand() != null) {
            LOG.info(" statement executed:{}", amResp.getAMCommand());
            switch (amResp.getAMCommand()) {
            case AM_RESYNC:
            case AM_SHUTDOWN:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");
            default:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");

            }
        }
        releasedContainers.clear();

        // Retrieve list of allocated containers from the response
        List<Container> newAllocatedContainers = amResp.getAllocatedContainers();
        // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size());
        numRequestedContainers -= newAllocatedContainers.size();
        long timestamp = System.currentTimeMillis();
        for (Container allocatedContainer : newAllocatedContainers) {

            LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode="
                    + allocatedContainer.getNodeId() + ", containerNodeURI="
                    + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory"
                    + allocatedContainer.getResource().getMemory() + ", priority"
                    + allocatedContainer.getPriority());
            // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString());

            boolean alreadyAllocated = true;
            StreamingContainerAgent.ContainerStartRequest csr = null;
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority()
                        .getPriority()) {
                    alreadyAllocated = false;
                    csr = entry.getKey();
                    break;
                }
            }

            if (alreadyAllocated) {
                LOG.info("Releasing {} as resource with priority {} was already assigned",
                        allocatedContainer.getId(), allocatedContainer.getPriority());
                releasedContainers.add(allocatedContainer.getId());
                numReleasedContainers++;
                numRequestedContainers++;
                continue;
            }
            if (csr != null) {
                requestedResources.remove(csr);
            }

            // allocate resource to container
            ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(),
                    allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(),
                    allocatedContainer.getResource().getMemory(),
                    allocatedContainer.getResource().getVirtualCores(),
                    allocatedContainer.getNodeHttpAddress());
            StreamingContainerAgent sca = dnmgr.assignContainer(resource, null);

            if (sca == null) {
                // allocated container no longer needed, add release request
                LOG.warn("Container {} allocated but nothing to deploy, going to release this container.",
                        allocatedContainer.getId());
                releasedContainers.add(allocatedContainer.getId());
            } else {
                AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer);
                this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder);
                ByteBuffer tokens = null;
                if (UserGroupInformation.isSecurityEnabled()) {
                    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                    Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken(
                            ugi.getUserName(), heartbeatListener.getAddress());
                    allocatedContainerHolder.delegationToken = delegationToken;
                    //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress());
                    tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken);
                }
                LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer,
                        nmClient, sca, tokens);
                // Thread launchThread = new Thread(runnableLaunchContainer);
                // launchThreads.add(launchThread);
                // launchThread.start();
                launchContainer.run(); // communication with NMs is now async

                // record container start event
                StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(),
                        allocatedContainer.getNodeId().toString());
                ev.setTimestamp(timestamp);
                dnmgr.recordEventAsync(ev);
            }
        }

        // track node updates for future locality constraint allocations
        // TODO: it seems 2.0.4-alpha doesn't give us any updates
        resourceRequestor.updateNodeReports(amResp.getUpdatedNodes());

        // Check the completed containers
        List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses();
        // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size());
        for (ContainerStatus containerStatus : completedContainers) {
            LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state="
                    + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                    + ", diagnostics=" + containerStatus.getDiagnostics());

            // non complete containers should not be here
            assert (containerStatus.getState() == ContainerState.COMPLETE);

            AllocatedContainer allocatedContainer = allocatedContainers
                    .remove(containerStatus.getContainerId().toString());
            if (allocatedContainer != null && allocatedContainer.delegationToken != null) {
                UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName());
            }
            int exitStatus = containerStatus.getExitStatus();
            if (0 != exitStatus) {
                if (allocatedContainer != null) {
                    numFailedContainers.incrementAndGet();
                }
                //          if (exitStatus == 1) {
                //            // non-recoverable StreamingContainer failure
                //            appDone = true;
                //            finalStatus = FinalApplicationStatus.FAILED;
                //            dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId();
                //            LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage);
                //          }
                //          else {
                // Recoverable failure or process killed (externally or via stop request by AM)
                // also occurs when a container was released by the application but never assigned/launched
                LOG.debug("Container {} failed or killed.", containerStatus.getContainerId());
                dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString());
                //          }
            } else {
                // container completed successfully
                numCompletedContainers.incrementAndGet();
                LOG.info("Container completed successfully." + ", containerId="
                        + containerStatus.getContainerId());
            }

            String containerIdStr = containerStatus.getContainerId().toString();
            dnmgr.removeContainerAgent(containerIdStr);

            // record container stop event
            StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus());
            ev.setReason(containerStatus.getDiagnostics());
            dnmgr.recordEventAsync(ev);
        }

        if (dnmgr.forcedShutdown) {
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finalStatus = FinalApplicationStatus.FAILED;
            appDone = true;
        } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0
                && dnmgr.containerStartRequests.isEmpty()) {
            LOG.debug("Exiting as no more containers are allocated or requested");
            finalStatus = FinalApplicationStatus.SUCCEEDED;
            appDone = true;
        }

        LOG.debug("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total="
                + numTotalContainers + ", requested=" + numRequestedContainers + ", released="
                + numReleasedContainers + ", completed=" + numCompletedContainers + ", failed="
                + numFailedContainers + ", currentAllocated=" + allocatedContainers.size());

        // monitor child containers
        dnmgr.monitorHeartbeat();
    }

    finishApplication(finalStatus, numTotalContainers);
}

From source file:com.datatorrent.stram.StreamingAppMasterService.java

License:Apache License

private void finishApplication(FinalApplicationStatus finalStatus, int numTotalContainers)
        throws YarnException, IOException {
    LOG.info("Application completed. Signalling finish to RM");
    FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class);
    finishReq.setFinalApplicationStatus(finalStatus);

    if (finalStatus != FinalApplicationStatus.SUCCEEDED) {
        String diagnostics = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", allocated=" + allocatedContainers.size() + ", failed="
                + numFailedContainers.get();
        if (!StringUtils.isEmpty(dnmgr.shutdownDiagnosticsMessage)) {
            diagnostics += "\n";
            diagnostics += dnmgr.shutdownDiagnosticsMessage;
        }//  w  w w . j  av a 2  s  .co  m
        // YARN-208 - as of 2.0.1-alpha dropped by the RM
        finishReq.setDiagnostics(diagnostics);
        // expected termination of the master process
        // application status and diagnostics message are set above
    }
    LOG.info("diagnostics: " + finishReq.getDiagnostics());
    amRmClient.unregisterApplicationMaster(finishReq.getFinalApplicationStatus(), finishReq.getDiagnostics(),
            null);
}

From source file:com.epam.hadoop.nv.yarn.Client.java

License:Apache License

/**
 * Monitor the submitted application for completion. Kill application if
 * time expires.//  ww w. j a  v  a 2  s  . c om
 * 
 * @param appId
 *            Application Id of application to be monitored
 * @return true if application completed successfully
 * @throws YarnException
 * @throws IOException
 */
private boolean monitorApplication(ApplicationId appId) throws YarnException, IOException {

    while (true) {

        // Check app status every 1 second.
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            LOG.debug("Thread sleep in monitoring loop interrupted");
        }

        // Get application report for the appId we are interested in
        ApplicationReport report = yarnClient.getApplicationReport(appId);

        LOG.info("Got application report from ASM for" + ", appId=" + appId.getId() + ", clientToAMToken="
                + report.getClientToAMToken() + ", appDiagnostics=" + report.getDiagnostics()
                + ", appMasterHost=" + report.getHost() + ", appQueue=" + report.getQueue()
                + ", appMasterRpcPort=" + report.getRpcPort() + ", appStartTime=" + report.getStartTime()
                + ", yarnAppState=" + report.getYarnApplicationState().toString() + ", distributedFinalState="
                + report.getFinalApplicationStatus().toString() + ", appTrackingUrl=" + report.getTrackingUrl()
                + ", appUser=" + report.getUser());

        YarnApplicationState state = report.getYarnApplicationState();
        FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
        if (YarnApplicationState.FINISHED == state) {
            if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
                LOG.info("Application has completed successfully. Breaking monitoring loop");
                return true;
            } else {
                LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString()
                        + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop");
                return false;
            }
        } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) {
            LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus="
                    + dsStatus.toString() + ". Breaking monitoring loop");
            return false;
        }

        if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
            LOG.info("Reached client specified timeout for application. Killing application");
            forceKillApplication(appId);
            return false;
        }
    }

}

From source file:com.flyhz.avengers.framework.AvengersClient.java

License:Apache License

/**
 * Monitor the submitted application for completion. Kill application if
 * time expires./*from ww w  . ja v  a2s.  c  o  m*/
 * 
 * @param appId
 *            Application Id of application to be monitored
 * @return true if application completed successfully
 * @throws YarnException
 * @throws IOException
 */
private boolean monitorApplication(ApplicationId appId) throws YarnException, IOException {

    while (true) {

        // Check app status every 10 second.
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            LOG.debug("Thread sleep in monitoring loop interrupted");
        }

        // Get application report for the appId we are interested in
        ApplicationReport report = yarnClient.getApplicationReport(appId);

        LOG.info("Got application report from ASM for" + ", appId=" + appId.getId() + ", clientToAMToken="
                + report.getClientToAMToken() + ", appDiagnostics=" + report.getDiagnostics()
                + ", appMasterHost=" + report.getHost() + ", appQueue=" + report.getQueue()
                + ", appMasterRpcPort=" + report.getRpcPort() + ", appStartTime=" + report.getStartTime()
                + ", yarnAppState=" + report.getYarnApplicationState().toString() + ", distributedFinalState="
                + report.getFinalApplicationStatus().toString() + ", appTrackingUrl=" + report.getTrackingUrl()
                + ", appUser=" + report.getUser());

        YarnApplicationState state = report.getYarnApplicationState();
        FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
        if (YarnApplicationState.FINISHED == state) {
            if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
                LOG.info("Application has completed successfully. Breaking monitoring loop");
                return true;
            } else {
                LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString()
                        + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop");
                return false;
            }
        } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) {
            LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus="
                    + dsStatus.toString() + ". Breaking monitoring loop");
            return false;
        }

        if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
            LOG.info("Reached client specified timeout for application. Killing application");
            forceKillApplication(appId);
            return false;
        }
    }

}

From source file:com.github.hdl.tensorflow.yarn.app.ApplicationMaster.java

License:Apache License

@VisibleForTesting
protected boolean finish() {
    // wait for completion.
    while (!done && (numCompletedContainers.get() != numTotalContainers)) {
        try {//www. ja  v  a2s  .c  o  m
            Thread.sleep(200);
        } catch (InterruptedException ex) {
        }
    }

    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
        }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    if (numCompletedContainers.get() - numFailedContainers.get() >= numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed="
                + numFailedContainers.get();
        LOG.info(appMessage);
        success = false;
    }
    try {
        amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    amRMClient.stop();

    return success;
}