Example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED

List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED.

Prototype

FinalApplicationStatus SUCCEEDED

To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED.

Click Source Link

Document

Application which finished successfully.

Usage

From source file:org.apache.reef.runtime.yarn.driver.unmanaged.UnmanagedAmTest.java

License:Apache License

@Test
public void testAmShutdown() throws IOException, YarnException {

    Assume.assumeTrue("This test requires a YARN Resource Manager to connect to",
            Boolean.parseBoolean(System.getenv("REEF_TEST_YARN")));

    final YarnConfiguration yarnConfig = new YarnConfiguration();

    // Start YARN client and register the application

    final YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(yarnConfig);/*from  w  ww . j a  va  2 s  .co  m*/
    yarnClient.start();

    final ContainerLaunchContext containerContext = Records.newRecord(ContainerLaunchContext.class);
    containerContext.setCommands(Collections.<String>emptyList());
    containerContext.setLocalResources(Collections.<String, LocalResource>emptyMap());
    containerContext.setEnvironment(Collections.<String, String>emptyMap());
    containerContext.setTokens(getTokens());

    final ApplicationSubmissionContext appContext = yarnClient.createApplication()
            .getApplicationSubmissionContext();
    appContext.setApplicationName("REEF_Unmanaged_AM_Test");
    appContext.setAMContainerSpec(containerContext);
    appContext.setUnmanagedAM(true);
    appContext.setQueue("default");

    final ApplicationId applicationId = appContext.getApplicationId();
    LOG.log(Level.INFO, "Registered YARN application: {0}", applicationId);

    yarnClient.submitApplication(appContext);

    LOG.log(Level.INFO, "YARN application submitted: {0}", applicationId);

    addToken(yarnClient.getAMRMToken(applicationId));

    // Start the AM

    final AMRMClientAsync<AMRMClient.ContainerRequest> rmClient = AMRMClientAsync.createAMRMClientAsync(1000,
            this);
    rmClient.init(yarnConfig);
    rmClient.start();

    final NMClientAsync nmClient = new NMClientAsyncImpl(this);
    nmClient.init(yarnConfig);
    nmClient.start();

    final RegisterApplicationMasterResponse registration = rmClient
            .registerApplicationMaster(NetUtils.getHostname(), -1, null);

    LOG.log(Level.INFO, "Unmanaged AM is running: {0}", registration);

    rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "Success!", null);

    LOG.log(Level.INFO, "Unregistering AM: state {0}", rmClient.getServiceState());

    // Shutdown the AM

    rmClient.stop();
    nmClient.stop();

    // Get the final application report

    final ApplicationReport appReport = yarnClient.getApplicationReport(applicationId);
    final YarnApplicationState appState = appReport.getYarnApplicationState();
    final FinalApplicationStatus finalAttemptStatus = appReport.getFinalApplicationStatus();

    LOG.log(Level.INFO, "Application {0} final attempt {1} status: {2}/{3}", new Object[] { applicationId,
            appReport.getCurrentApplicationAttemptId(), appState, finalAttemptStatus });

    Assert.assertEquals("Application must be in FINISHED state", YarnApplicationState.FINISHED, appState);
    Assert.assertEquals("Final status must be SUCCEEDED", FinalApplicationStatus.SUCCEEDED, finalAttemptStatus);

    // Shutdown YARN client

    yarnClient.stop();
}

From source file:org.apache.reef.runtime.yarn.driver.YarnContainerManager.java

License:Apache License

void onStop() {

    LOG.log(Level.FINE, "Stop Runtime: RM status {0}", this.resourceManager.getServiceState());

    if (this.resourceManager.getServiceState() == Service.STATE.STARTED) {
        // invariant: if RM is still running then we declare success.
        try {//from  w ww. ja  va2  s  .  c o  m
            this.reefEventHandlers.close();
            this.resourceManager.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
            this.resourceManager.close();
        } catch (final Exception e) {
            LOG.log(Level.WARNING, "Error shutting down YARN application", e);
        }
    }

    if (this.nodeManager.getServiceState() == Service.STATE.STARTED) {
        try {
            this.nodeManager.close();
        } catch (final IOException e) {
            LOG.log(Level.WARNING, "Error closing YARN Node Manager", e);
        }
    }
}

From source file:org.apache.samza.job.yarn.SamzaTaskManager.java

License:Apache License

/**
 * This methods handles the onContainerCompleted callback from the RM. Based on the ContainerExitStatus, it decides
 * whether a container that exited is marked as complete or failure.
 *///from w w w . ja  v a 2  s .co  m
@Override
public void onContainerCompleted(ContainerStatus containerStatus) {
    String containerIdStr = ConverterUtils.toString(containerStatus.getContainerId());
    int containerId = -1;
    for (Map.Entry<Integer, YarnContainer> entry : state.runningContainers.entrySet()) {
        if (entry.getValue().id().equals(containerStatus.getContainerId())) {
            containerId = entry.getKey();
            break;
        }
    }
    state.runningContainers.remove(containerId);

    int exitStatus = containerStatus.getExitStatus();
    switch (exitStatus) {
    case ContainerExitStatus.SUCCESS:
        log.info("Container {} completed successfully.", containerIdStr);

        state.completedContainers.incrementAndGet();

        if (containerId != -1) {
            state.finishedContainers.add(containerId);
            containerFailures.remove(containerId);
        }

        if (state.completedContainers.get() == state.containerCount) {
            log.info("Setting job status to SUCCEEDED, since all containers have been marked as completed.");
            state.status = FinalApplicationStatus.SUCCEEDED;
        }
        break;

    case ContainerExitStatus.DISKS_FAILED:
    case ContainerExitStatus.ABORTED:
    case ContainerExitStatus.PREEMPTED:
        log.info(
                "Got an exit code of {}. This means that container {} was "
                        + "killed by YARN, either due to being released by the application "
                        + "master or being 'lost' due to node failures etc. or due to preemption by the RM",
                exitStatus, containerIdStr);

        state.releasedContainers.incrementAndGet();

        // If this container was assigned some partitions (a containerId), then
        // clean up, and request a new container for the tasks. This only
        // should happen if the container was 'lost' due to node failure, not
        // if the AM released the container.
        if (containerId != -1) {
            log.info(
                    "Released container {} was assigned task group ID {}. Requesting a new container for the task group.",
                    containerIdStr, containerId);

            state.neededContainers.incrementAndGet();
            state.jobHealthy.set(false);

            // request a container on new host
            containerAllocator.requestContainer(containerId, ContainerAllocator.ANY_HOST);
        }
        break;

    default:
        // TODO: Handle failure more intelligently. Should track NodeFailures!
        log.info("Container failed for some reason. Let's start it again");
        log.info("Container " + containerIdStr + " failed with exit code " + exitStatus + " - "
                + containerStatus.getDiagnostics());

        state.failedContainers.incrementAndGet();
        state.failedContainersStatus.put(containerIdStr, containerStatus);
        state.jobHealthy.set(false);

        if (containerId != -1) {
            state.neededContainers.incrementAndGet();
            // Find out previously running container location
            String lastSeenOn = state.jobCoordinator.jobModel().getContainerToHostValue(containerId,
                    SetContainerHostMapping.HOST_KEY);
            if (!hostAffinityEnabled || lastSeenOn == null) {
                lastSeenOn = ContainerAllocator.ANY_HOST;
            }
            // A container failed for an unknown reason. Let's check to see if
            // we need to shutdown the whole app master if too many container
            // failures have happened. The rules for failing are that the
            // failure count for a task group id must be > the configured retry
            // count, and the last failure (the one prior to this one) must have
            // happened less than retry window ms ago. If retry count is set to
            // 0, the app master will fail on any container failure. If the
            // retry count is set to a number < 0, a container failure will
            // never trigger an app master failure.
            int retryCount = yarnConfig.getContainerRetryCount();
            int retryWindowMs = yarnConfig.getContainerRetryWindowMs();

            if (retryCount == 0) {
                log.error(
                        "Container ID {} ({}) failed, and retry count is set to 0, so shutting down the application master, and marking the job as failed.",
                        containerId, containerIdStr);

                tooManyFailedContainers = true;
            } else if (retryCount > 0) {
                int currentFailCount;
                long lastFailureTime;
                if (containerFailures.containsKey(containerId)) {
                    ContainerFailure failure = containerFailures.get(containerId);
                    currentFailCount = failure.getCount() + 1;
                    lastFailureTime = failure.getLastFailure();
                } else {
                    currentFailCount = 1;
                    lastFailureTime = 0L;
                }
                if (currentFailCount >= retryCount) {
                    long lastFailureMsDiff = System.currentTimeMillis() - lastFailureTime;

                    if (lastFailureMsDiff < retryWindowMs) {
                        log.error("Container ID " + containerId + "(" + containerIdStr + ") has failed "
                                + currentFailCount + " times, with last failure " + lastFailureMsDiff
                                + "ms ago. This is greater than retry count of " + retryCount
                                + " and window of " + retryWindowMs
                                + "ms , so shutting down the application master, and marking the job as failed.");

                        // We have too many failures, and we're within the window
                        // boundary, so reset shut down the app master.
                        tooManyFailedContainers = true;
                        state.status = FinalApplicationStatus.FAILED;
                    } else {
                        log.info(
                                "Resetting fail count for container ID {} back to 1, since last container failure ({}) for "
                                        + "this container ID was outside the bounds of the retry window.",
                                containerId, containerIdStr);

                        // Reset counter back to 1, since the last failure for this
                        // container happened outside the window boundary.
                        containerFailures.put(containerId, new ContainerFailure(1, System.currentTimeMillis()));
                    }
                } else {
                    log.info("Current fail count for container ID {} is {}.", containerId, currentFailCount);
                    containerFailures.put(containerId,
                            new ContainerFailure(currentFailCount, System.currentTimeMillis()));
                }
            }

            if (!tooManyFailedContainers) {
                // Request a new container
                containerAllocator.requestContainer(containerId, lastSeenOn);
            }
        }

    }
}

From source file:org.apache.slider.client.SliderClient.java

License:Apache License

/**
 * Build an exit code for an application from its report.
 * If the report parameter is null, its interpreted as a timeout
 * @param report report application report
 * @return the exit code/*from   ww w. j a  va  2 s  .co m*/
 * @throws IOException
 * @throws YarnException
 */
private int buildExitCode(ApplicationReport report) throws IOException, YarnException {
    if (null == report) {
        return EXIT_TIMED_OUT;
    }

    YarnApplicationState state = report.getYarnApplicationState();
    FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
    switch (state) {
    case FINISHED:
        if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
            log.info("Application has completed successfully");
            return EXIT_SUCCESS;
        } else {
            log.info("Application finished unsuccessfully."
                    + "YarnState = {}, DSFinalStatus = {} Breaking monitoring loop", state, dsStatus);
            return EXIT_YARN_SERVICE_FINISHED_WITH_ERROR;
        }

    case KILLED:
        log.info("Application did not finish. YarnState={}, DSFinalStatus={}", state, dsStatus);
        return EXIT_YARN_SERVICE_KILLED;

    case FAILED:
        log.info("Application Failed. YarnState={}, DSFinalStatus={}", state, dsStatus);
        return EXIT_YARN_SERVICE_FAILED;

    default:
        //not in any of these states
        return EXIT_SUCCESS;
    }
}

From source file:org.apache.slider.server.appmaster.rpc.SliderIPCService.java

License:Apache License

@Override //SliderClusterProtocol
public Messages.StopClusterResponseProto stopCluster(Messages.StopClusterRequestProto request)
        throws IOException, YarnException {
    onRpcCall("stop");
    String message = request.getMessage();
    if (message == null) {
        message = "application stopped by client";
    }/*from w  ww . j  a  v  a  2  s . c o m*/
    ActionStopSlider stopSlider = new ActionStopSlider(message, 1000, TimeUnit.MILLISECONDS,
            LauncherExitCodes.EXIT_SUCCESS, FinalApplicationStatus.SUCCEEDED, message);
    log.info("SliderAppMasterApi.stopCluster: {}", stopSlider);
    schedule(stopSlider);
    return Messages.StopClusterResponseProto.getDefaultInstance();
}

From source file:org.apache.slider.server.appmaster.rpc.SliderIPCService.java

License:Apache License

@Override //SliderClusterProtocol
public Messages.UpgradeContainersResponseProto upgradeContainers(Messages.UpgradeContainersRequestProto request)
        throws IOException, YarnException {
    onRpcCall("upgrade");
    String message = request.getMessage();
    if (message == null) {
        message = "application containers upgraded by client";
    }/*from w  w w. j  av a2  s .  com*/
    ActionUpgradeContainers upgradeContainers = new ActionUpgradeContainers("Upgrade containers", 1000,
            TimeUnit.MILLISECONDS, LauncherExitCodes.EXIT_SUCCESS, FinalApplicationStatus.SUCCEEDED,
            request.getContainerList(), request.getComponentList(), message);
    log.info("SliderAppMasterApi.upgradeContainers: {}", upgradeContainers);
    schedule(upgradeContainers);
    return Messages.UpgradeContainersResponseProto.getDefaultInstance();
}

From source file:org.apache.slider.server.appmaster.SliderAppMaster.java

License:Apache License

/**
 * RM wants to shut down the AM/*from  w ww. j av  a2 s  . co  m*/
 */
@Override //AMRMClientAsync
public void onShutdownRequest() {
    LOG_YARN.info("Shutdown Request received");
    signalAMComplete(new ActionStopSlider("stop", EXIT_SUCCESS, FinalApplicationStatus.SUCCEEDED,
            "Shutdown requested from RM"));
}

From source file:org.apache.slider.server.appmaster.SliderAppMaster.java

License:Apache License

@Override //SliderClusterProtocol
public Messages.StopClusterResponseProto stopCluster(Messages.StopClusterRequestProto request)
        throws IOException, YarnException {
    onRpcCall("stopCluster()");
    String message = request.getMessage();
    if (message == null) {
        message = "application frozen by client";
    }//  ww  w.jav  a 2  s  .  c o m
    ActionStopSlider stopSlider = new ActionStopSlider(message, 1000, TimeUnit.MILLISECONDS,
            LauncherExitCodes.EXIT_SUCCESS, FinalApplicationStatus.SUCCEEDED, message);
    log.info("SliderAppMasterApi.stopCluster: {}", stopSlider);
    schedule(stopSlider);
    return Messages.StopClusterResponseProto.getDefaultInstance();
}

From source file:org.apache.slider.server.appmaster.web.rest.application.actions.RestActionStop.java

License:Apache License

public StopResponse stop(HttpServletRequest request, UriInfo uriInfo, String body) {
    String verb = request.getMethod();
    log.info("Ping {}", verb);
    StopResponse response = new StopResponse();
    response.verb = verb;//  w  w w . ja  v a 2s .  c  o  m
    long time = System.currentTimeMillis();
    String text = String.format(Locale.ENGLISH, "Stopping action %s received at %tc", verb, time);
    response.text = text;
    log.info(text);
    ActionStopSlider stopSlider = new ActionStopSlider(text, 1000, TimeUnit.MILLISECONDS,
            LauncherExitCodes.EXIT_SUCCESS, FinalApplicationStatus.SUCCEEDED, text);
    log.info("SliderAppMasterApi.stopCluster: {}", stopSlider);
    slider.getQueues().schedule(stopSlider);

    return response;
}

From source file:org.apache.sysml.yarn.DMLAppMaster.java

License:Apache License

public void runApplicationMaster(String[] args) throws YarnException, IOException {
    _conf = new YarnConfiguration();

    //obtain application ID
    String containerIdString = System.getenv(Environment.CONTAINER_ID.name());
    ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
    _appId = containerId.getApplicationAttemptId().getApplicationId();
    LOG.info("SystemML appplication master (applicationID: " + _appId + ")");

    //initialize clients to ResourceManager
    AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
    rmClient.init(_conf);/*from  w  w  w. j a  v  a  2  s .  c o m*/
    rmClient.start();

    //register with ResourceManager
    rmClient.registerApplicationMaster("", 0, ""); //host, port for rm communication
    LOG.debug("Registered the SystemML application master with resource manager");

    //start status reporter to ResourceManager
    DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000);
    reporter.start();
    LOG.debug("Started status reporter (heartbeat to resource manager)");

    //set DMLscript app master context
    DMLScript.setActiveAM();

    //parse input arguments
    String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();

    //run SystemML CP
    FinalApplicationStatus status = null;
    try {
        //core dml script execution (equivalent to non-AM runtime)
        boolean success = DMLScript.executeScript(_conf, otherArgs);

        if (success)
            status = FinalApplicationStatus.SUCCEEDED;
        else
            status = FinalApplicationStatus.FAILED;
    } catch (DMLScriptException ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t"
                + ex.getMessage());
        status = FinalApplicationStatus.FAILED;
        writeMessageToHDFSWorkingDir(ex.getMessage());
    } catch (Exception ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex);
        status = FinalApplicationStatus.FAILED;
    } finally {
        //stop periodic status reports
        reporter.stopStatusReporter();
        LOG.debug("Stopped status reporter");

        //unregister resource manager client
        rmClient.unregisterApplicationMaster(status, "", "");
        LOG.debug("Unregistered the SystemML application master");
    }
}