Example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED

List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED.

Prototype

FinalApplicationStatus FAILED

To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED.

Click Source Link

Document

Application which failed.

Usage

From source file:com.cloudera.llama.am.yarn.YarnRMConnector.java

License:Apache License

@Override
public void onError(final Throwable ex) {
    LOG.error("Error in Yarn client: {}", ex.toString(), ex);
    llamaCallback.stoppedByRM();/*from w ww. ja  va2  s .  c  o  m*/
    // no need to use a ugi.doAs() as this is called from within Yarn client
    _stop(FinalApplicationStatus.FAILED, "Error in Yarn client: " + ex.toString(), true);
}

From source file:com.datatorrent.stram.StramMiniClusterTest.java

License:Apache License

@Test
public void testOperatorFailureRecovery() throws Exception {

    LogicalPlan dag = new LogicalPlan();
    dag.setAttribute(com.datatorrent.api.Context.DAGContext.APPLICATION_PATH,
            "target/" + this.getClass().getName());
    FailingOperator badOperator = dag.addOperator("badOperator", FailingOperator.class);
    dag.getContextAttributes(badOperator).put(OperatorContext.RECOVERY_ATTEMPTS, 1);

    LOG.info("Initializing Client");
    StramClient client = new StramClient(conf, dag);
    if (StringUtils.isBlank(System.getenv("JAVA_HOME"))) {
        client.javaCmd = "java"; // JAVA_HOME not set in the yarn mini cluster
    }/*w w  w . ja v  a  2 s .c om*/
    try {
        client.start();
        client.startApplication();
        client.setClientTimeout(120000);

        boolean result = client.monitorApplication();

        LOG.info("Client run completed. Result=" + result);
        Assert.assertFalse("should fail", result);

        ApplicationReport ar = client.getApplicationReport();
        Assert.assertEquals("should fail", FinalApplicationStatus.FAILED, ar.getFinalApplicationStatus());
        // unable to get the diagnostics message set by the AM here - see YARN-208
        // diagnostics message does not make it here even with Hadoop 2.2 (but works on standalone cluster)
        //Assert.assertTrue("appReport " + ar, ar.getDiagnostics().contains("badOperator"));
    } finally {
        client.stop();
    }
}

From source file:com.datatorrent.stram.StreamingAppMasterService.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException//w  ww.ja  va 2s.  c o m
 */
@SuppressWarnings("SleepWhileInLoop")
private void execute() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    LOG.info("number of tokens: {}", credentials.getAllTokens().size());
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.debug("token: {}", token);
    }
    final Configuration conf = getConfig();
    long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math
            .min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME)));
    long expiryTime = System.currentTimeMillis() + tokenLifeTime;
    LOG.debug(" expiry token time {}", tokenLifeTime);
    String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE);

    // Register self with ResourceManager
    RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0,
            appMasterTrackingUrl);

    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    int maxVcores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max mem {}m and vcores {} capabililty of resources in this cluster ", maxMem, maxVcores);

    // for locality relaxation fall back
    Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps
            .newHashMap();

    // Setup heartbeat emitter
    // TODO poll RM every now and then with an empty request to let RM know that we are alive
    // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting:
    // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS
    // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter
    // is not required.

    int loopCounter = -1;
    List<ContainerId> releasedContainers = new ArrayList<ContainerId>();
    int numTotalContainers = 0;
    // keep track of already requested containers to not request them again while waiting for allocation
    int numRequestedContainers = 0;
    int numReleasedContainers = 0;
    int nextRequestPriority = 0;
    ResourceRequestHandler resourceRequestor = new ResourceRequestHandler();

    YarnClient clientRMService = YarnClient.createYarnClient();

    try {
        // YARN-435
        // we need getClusterNodes to populate the initial node list,
        // subsequent updates come through the heartbeat response
        clientRMService.init(conf);
        clientRMService.start();

        ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService,
                dag.getAttributes().get(DAG.APPLICATION_NAME),
                UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID));
        if (ar != null) {
            appDone = true;
            dnmgr.shutdownDiagnosticsMessage = String.format(
                    "Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.",
                    ar.getApplicationId().toString(), ar.getName(), ar.getUser());
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finishApplication(FinalApplicationStatus.FAILED, numTotalContainers);
            return;
        }
        resourceRequestor.updateNodeReports(clientRMService.getNodeReports());
    } catch (Exception e) {
        throw new RuntimeException("Failed to retrieve cluster nodes report.", e);
    } finally {
        clientRMService.stop();
    }

    // check for previously allocated containers
    // as of 2.2, containers won't survive AM restart, but this will change in the future - YARN-1490
    checkContainerStatus();
    FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
    final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS,
            YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);

    while (!appDone) {
        loopCounter++;

        if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime
                && hdfsKeyTabFile != null) {
            String applicationId = appAttemptID.getApplicationId().toString();
            expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, "." + File.separator + "tmp",
                    applicationId, conf, hdfsKeyTabFile, credentials, rmAddress, true);
        }

        Runnable r;
        while ((r = this.pendingTasks.poll()) != null) {
            r.run();
        }

        // log current state
        /*
         * LOG.info("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" +
         * numTotalContainers + ", requested=" + numRequestedContainers + ", completed=" + numCompletedContainers +
         * ", failed=" + numFailedContainers + ", currentAllocated=" + this.allAllocatedContainers.size());
         */
        // Sleep before each loop when asking RM for containers
        // to avoid flooding RM with spurious requests when it
        // need not have any available containers
        try {
            sleep(1000);
        } catch (InterruptedException e) {
            LOG.info("Sleep interrupted " + e.getMessage());
        }

        // Setup request to be sent to RM to allocate containers
        List<ContainerRequest> containerRequests = new ArrayList<ContainerRequest>();
        List<ContainerRequest> removedContainerRequests = new ArrayList<ContainerRequest>();

        // request containers for pending deploy requests
        if (!dnmgr.containerStartRequests.isEmpty()) {
            StreamingContainerAgent.ContainerStartRequest csr;
            while ((csr = dnmgr.containerStartRequests.poll()) != null) {
                if (csr.container.getRequiredMemoryMB() > maxMem) {
                    LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.",
                            csr.container.getRequiredMemoryMB(), maxMem);
                    csr.container.setRequiredMemoryMB(maxMem);
                }
                if (csr.container.getRequiredVCores() > maxVcores) {
                    LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.",
                            csr.container.getRequiredVCores(), maxVcores);
                    csr.container.setRequiredVCores(maxVcores);
                }
                csr.container.setResourceRequestPriority(nextRequestPriority++);
                ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true);
                MutablePair<Integer, ContainerRequest> pair = new MutablePair<Integer, ContainerRequest>(
                        loopCounter, cr);
                requestedResources.put(csr, pair);
                containerRequests.add(cr);
            }
        }

        if (!requestedResources.isEmpty()) {
            //resourceRequestor.clearNodeMapping();
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) {
                    StreamingContainerAgent.ContainerStartRequest csr = entry.getKey();
                    removedContainerRequests.add(entry.getValue().getRight());
                    ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false);
                    entry.getValue().setLeft(loopCounter);
                    entry.getValue().setRight(cr);
                    containerRequests.add(cr);
                }
            }
        }

        numTotalContainers += containerRequests.size();
        numRequestedContainers += containerRequests.size();
        AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests,
                releasedContainers);
        if (amResp.getAMCommand() != null) {
            LOG.info(" statement executed:{}", amResp.getAMCommand());
            switch (amResp.getAMCommand()) {
            case AM_RESYNC:
            case AM_SHUTDOWN:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");
            default:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");

            }
        }
        releasedContainers.clear();

        // Retrieve list of allocated containers from the response
        List<Container> newAllocatedContainers = amResp.getAllocatedContainers();
        // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size());
        numRequestedContainers -= newAllocatedContainers.size();
        long timestamp = System.currentTimeMillis();
        for (Container allocatedContainer : newAllocatedContainers) {

            LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode="
                    + allocatedContainer.getNodeId() + ", containerNodeURI="
                    + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory"
                    + allocatedContainer.getResource().getMemory() + ", priority"
                    + allocatedContainer.getPriority());
            // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString());

            boolean alreadyAllocated = true;
            StreamingContainerAgent.ContainerStartRequest csr = null;
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority()
                        .getPriority()) {
                    alreadyAllocated = false;
                    csr = entry.getKey();
                    break;
                }
            }

            if (alreadyAllocated) {
                LOG.info("Releasing {} as resource with priority {} was already assigned",
                        allocatedContainer.getId(), allocatedContainer.getPriority());
                releasedContainers.add(allocatedContainer.getId());
                numReleasedContainers++;
                numRequestedContainers++;
                continue;
            }
            if (csr != null) {
                requestedResources.remove(csr);
            }

            // allocate resource to container
            ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(),
                    allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(),
                    allocatedContainer.getResource().getMemory(),
                    allocatedContainer.getResource().getVirtualCores(),
                    allocatedContainer.getNodeHttpAddress());
            StreamingContainerAgent sca = dnmgr.assignContainer(resource, null);

            if (sca == null) {
                // allocated container no longer needed, add release request
                LOG.warn("Container {} allocated but nothing to deploy, going to release this container.",
                        allocatedContainer.getId());
                releasedContainers.add(allocatedContainer.getId());
            } else {
                AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer);
                this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder);
                ByteBuffer tokens = null;
                if (UserGroupInformation.isSecurityEnabled()) {
                    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                    Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken(
                            ugi.getUserName(), heartbeatListener.getAddress());
                    allocatedContainerHolder.delegationToken = delegationToken;
                    //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress());
                    tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken);
                }
                LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer,
                        nmClient, sca, tokens);
                // Thread launchThread = new Thread(runnableLaunchContainer);
                // launchThreads.add(launchThread);
                // launchThread.start();
                launchContainer.run(); // communication with NMs is now async

                // record container start event
                StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(),
                        allocatedContainer.getNodeId().toString());
                ev.setTimestamp(timestamp);
                dnmgr.recordEventAsync(ev);
            }
        }

        // track node updates for future locality constraint allocations
        // TODO: it seems 2.0.4-alpha doesn't give us any updates
        resourceRequestor.updateNodeReports(amResp.getUpdatedNodes());

        // Check the completed containers
        List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses();
        // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size());
        for (ContainerStatus containerStatus : completedContainers) {
            LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state="
                    + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                    + ", diagnostics=" + containerStatus.getDiagnostics());

            // non complete containers should not be here
            assert (containerStatus.getState() == ContainerState.COMPLETE);

            AllocatedContainer allocatedContainer = allocatedContainers
                    .remove(containerStatus.getContainerId().toString());
            if (allocatedContainer != null && allocatedContainer.delegationToken != null) {
                UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName());
            }
            int exitStatus = containerStatus.getExitStatus();
            if (0 != exitStatus) {
                if (allocatedContainer != null) {
                    numFailedContainers.incrementAndGet();
                }
                //          if (exitStatus == 1) {
                //            // non-recoverable StreamingContainer failure
                //            appDone = true;
                //            finalStatus = FinalApplicationStatus.FAILED;
                //            dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId();
                //            LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage);
                //          }
                //          else {
                // Recoverable failure or process killed (externally or via stop request by AM)
                // also occurs when a container was released by the application but never assigned/launched
                LOG.debug("Container {} failed or killed.", containerStatus.getContainerId());
                dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString());
                //          }
            } else {
                // container completed successfully
                numCompletedContainers.incrementAndGet();
                LOG.info("Container completed successfully." + ", containerId="
                        + containerStatus.getContainerId());
            }

            String containerIdStr = containerStatus.getContainerId().toString();
            dnmgr.removeContainerAgent(containerIdStr);

            // record container stop event
            StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus());
            ev.setReason(containerStatus.getDiagnostics());
            dnmgr.recordEventAsync(ev);
        }

        if (dnmgr.forcedShutdown) {
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finalStatus = FinalApplicationStatus.FAILED;
            appDone = true;
        } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0
                && dnmgr.containerStartRequests.isEmpty()) {
            LOG.debug("Exiting as no more containers are allocated or requested");
            finalStatus = FinalApplicationStatus.SUCCEEDED;
            appDone = true;
        }

        LOG.debug("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total="
                + numTotalContainers + ", requested=" + numRequestedContainers + ", released="
                + numReleasedContainers + ", completed=" + numCompletedContainers + ", failed="
                + numFailedContainers + ", currentAllocated=" + allocatedContainers.size());

        // monitor child containers
        dnmgr.monitorHeartbeat();
    }

    finishApplication(finalStatus, numTotalContainers);
}

From source file:com.github.hdl.tensorflow.yarn.app.ApplicationMaster.java

License:Apache License

@VisibleForTesting
protected boolean finish() {
    // wait for completion.
    while (!done && (numCompletedContainers.get() != numTotalContainers)) {
        try {/*w ww. ja va2  s.  c  om*/
            Thread.sleep(200);
        } catch (InterruptedException ex) {
        }
    }

    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
        }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    if (numCompletedContainers.get() - numFailedContainers.get() >= numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed="
                + numFailedContainers.get();
        LOG.info(appMessage);
        success = false;
    }
    try {
        amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    amRMClient.stop();

    return success;
}

From source file:com.hazelcast.yarn.ApplicationMaster.java

License:Open Source License

private void runApplicationMaster() throws IOException, YarnException, InterruptedException {
    this.rmClient.registerApplicationMaster("", 0, "");

    LOG.log(Level.INFO, "Application master running...");
    Priority priority = Records.newRecord(Priority.class);
    priority.setPriority(0);/*from  w  w w.  ja  v a2 s  .co  m*/

    try {
        while (!this.nmClient.isInState(Service.STATE.STOPPED)) {
            int running = this.containers.size();

            if (running < this.properties.clusterSize() && checkResources()) {
                // Resource requirements for worker containers.
                Resource capability = Records.newRecord(Resource.class);
                capability.setMemory(this.properties.memoryPerNode());
                capability.setVirtualCores(this.properties.cpuPerNode());

                for (int i = 0; i < this.properties.clusterSize() - running; i++) {
                    // Make container requests to ResourceManager
                    AMRMClient.ContainerRequest containerAsk = new AMRMClient.ContainerRequest(capability, null,
                            null, priority);

                    this.rmClient.addContainerRequest(containerAsk);
                    LOG.log(Level.INFO, "Making request. Memory: {0}, cpu {1}.",
                            new Object[] { this.properties.memoryPerNode(), this.properties.cpuPerNode() });
                }
            }

            TimeUnit.MILLISECONDS.sleep(TIMEOUT);
        }
    } catch (InterruptedException e) {
        this.rmClient.unregisterApplicationMaster(FinalApplicationStatus.KILLED, "", "");
        LOG.log(Level.WARNING, "Application master has been interrupted.");
    } catch (Exception e) {
        this.rmClient.unregisterApplicationMaster(FinalApplicationStatus.FAILED, "", "");
        LOG.log(Level.SEVERE, "Application master caused error.", e);
    }
}

From source file:com.ibm.bi.dml.yarn.DMLAppMaster.java

License:Open Source License

/**
 * /*  w  w  w  . j av  a 2s . c o  m*/
 * @param args
 * @throws YarnException 
 * @throws IOException 
 */
public void runApplicationMaster(String[] args) throws YarnException, IOException {
    _conf = new YarnConfiguration();

    //obtain application ID
    String containerIdString = System.getenv(Environment.CONTAINER_ID.name());
    ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
    _appId = containerId.getApplicationAttemptId().getApplicationId();
    LOG.info("SystemML appplication master (applicationID: " + _appId + ")");

    //initialize clients to ResourceManager
    AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
    rmClient.init(_conf);
    rmClient.start();

    //register with ResourceManager
    rmClient.registerApplicationMaster("", 0, ""); //host, port for rm communication
    LOG.debug("Registered the SystemML application master with resource manager");

    //start status reporter to ResourceManager
    DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000);
    reporter.start();
    LOG.debug("Started status reporter (heartbeat to resource manager)");

    //set DMLscript app master context
    DMLScript.setActiveAM();

    //parse input arguments
    String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();

    //run SystemML CP
    FinalApplicationStatus status = null;
    try {
        //core dml script execution (equivalent to non-AM runtime)
        boolean success = DMLScript.executeScript(_conf, otherArgs);

        if (success)
            status = FinalApplicationStatus.SUCCEEDED;
        else
            status = FinalApplicationStatus.FAILED;
    } catch (DMLScriptException ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t"
                + ex.getMessage());
        status = FinalApplicationStatus.FAILED;
        writeMessageToHDFSWorkingDir(ex.getMessage());
    } catch (Exception ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex);
        status = FinalApplicationStatus.FAILED;
    } finally {
        //stop periodic status reports
        reporter.stopStatusReporter();
        LOG.debug("Stopped status reporter");

        //unregister resource manager client
        rmClient.unregisterApplicationMaster(status, "", "");
        LOG.debug("Unregistered the SystemML application master");
    }
}

From source file:com.inforefiner.hdata.ApplicationMaster.java

License:Apache License

@VisibleForTesting
protected boolean finish() {
    // wait for completion.
    while (!done && (numCompletedContainers.get() != numTotalContainers)) {
        try {/*from   w ww .  ja v a2s.c  om*/
            Thread.sleep(200);
        } catch (InterruptedException ex) {
        }
    }

    if (timelineClient != null) {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END,
                domainId, appSubmitterUgi);
    }

    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
        }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed="
                + numFailedContainers.get();
        LOG.info(appMessage);
        success = false;
    }
    try {
        amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    amRMClient.stop();

    // Stop Timeline Client
    if (timelineClient != null) {
        timelineClient.stop();
    }

    return success;
}

From source file:com.sogou.dockeronyarn.client.DockerApplicationMaster_23.java

License:Apache License

private void finish() {
    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {/*from   w  w w.  ja v  a 2s  .  co m*/
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
        }
    }

    // When the application completes, it should stopContainer all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a shutdown application
    // signal to the RM
    LOG.info("Application completed. Signalling shutdown to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    success = true;
    if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed="
                + numFailedContainers.get();
        success = false;
    }
    try {
        amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    amRMClient.stop();
}

From source file:com.sogou.dockeronyarn.service.DockerApplicationMaster_24.java

License:Apache License

@VisibleForTesting
protected boolean finish() {
    // wait for completion.
    while (!done && (numCompletedContainers.get() != numTotalContainers)) {
        try {/*www  .  j  a  v  a  2 s. co  m*/
            Thread.sleep(200);
        } catch (InterruptedException ex) {
        }
    }

    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
        }
    }

    // When the application completes, it should stopContainer all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a shutdown application
    // signal to the RM
    LOG.info("Application completed. Signalling shutdown to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed="
                + numFailedContainers.get();
        success = false;
    }
    try {
        amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    amRMClient.stop();

    return success;
}

From source file:com.tito.easyyarn.appmaster.ApplicationMaster.java

License:Apache License

@VisibleForTesting
protected boolean finish() {

    // wait for completion.
    LOG.info("(!done " + (!done));
    LOG.info("!hasCompleted()" + (!hasCompleted()));
    while (!done && !hasCompleted()) {
        try {//from www .  j a  va2 s  .co  m
            if (currentPhase == null) {
                if (!pendingPhases.isEmpty()) {
                    currentPhase = pendingPhases.poll();
                    currentPhase.setPassedArguments(passedArguments);
                    Thread phaseThread = new Thread(currentPhase);
                    phaseThreads.add(phaseThread);
                    LOG.info("Starting First Phase:" + currentPhase.getId());
                    phaseThread.start();
                } else {
                    LOG.error("NO Phases Registered , aborting phase execution");
                    done = true;
                }
                continue;
            }
            PhaseStatus currentPhaseStatus = currentPhase.getPhaseStatus();

            if (currentPhaseStatus != null && currentPhaseStatus != PhaseStatus.RUNNING
                    && currentPhaseStatus != PhaseStatus.PENDING) {
                LOG.info("currentPhase.getPhaseStatus()" + currentPhaseStatus);
                if (currentPhaseStatus == PhaseStatus.SUCCESSED) {
                    LOG.info("Phase Completed successfully : " + currentPhase.getId());
                    completedPhases.add(currentPhase);
                    // check to see if any pending phases start them
                    if (pendingPhases.isEmpty()) {
                        LOG.info("No More Phases remaining");
                        done = true;
                    }
                    if (!pendingPhases.isEmpty()) {
                        currentPhase = pendingPhases.poll();
                        currentPhase.setPassedArguments(passedArguments);
                        Thread phaseThread = new Thread(currentPhase);
                        phaseThreads.add(phaseThread);
                        phaseThread.start();
                    }
                }
                // phase failed
                else {
                    failedPhases.add(currentPhase);
                    done = true;
                }
            }
            Thread.sleep(1000);
        } catch (InterruptedException ex) {
        }
    }

    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread phaseThread : phaseThreads) {
        try {
            phaseThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
        }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    LOG.info("hasCompletedSuccessfully():" + hasCompletedSuccessfully());
    LOG.info("completedPhases.size():" + completedPhases.size());
    LOG.info("phaseList.size():" + phaseList.size());
    LOG.info("+failedPhases.size():" + failedPhases.size());
    if (hasCompletedSuccessfully()) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total Phases=" + phaseList.size() + ", completed="
                + completedPhases.size() + ", failed=" + failedPhases.size();
        success = false;
    }
    try {
        amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    amRMClient.stop();

    return success;
}