List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED
FinalApplicationStatus SUCCEEDED
To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED.
Click Source Link
From source file:org.apache.reef.runtime.yarn.driver.unmanaged.UnmanagedAmTest.java
License:Apache License
@Test public void testAmShutdown() throws IOException, YarnException { Assume.assumeTrue("This test requires a YARN Resource Manager to connect to", Boolean.parseBoolean(System.getenv("REEF_TEST_YARN"))); final YarnConfiguration yarnConfig = new YarnConfiguration(); // Start YARN client and register the application final YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(yarnConfig);/*from w ww . j a va 2 s .co m*/ yarnClient.start(); final ContainerLaunchContext containerContext = Records.newRecord(ContainerLaunchContext.class); containerContext.setCommands(Collections.<String>emptyList()); containerContext.setLocalResources(Collections.<String, LocalResource>emptyMap()); containerContext.setEnvironment(Collections.<String, String>emptyMap()); containerContext.setTokens(getTokens()); final ApplicationSubmissionContext appContext = yarnClient.createApplication() .getApplicationSubmissionContext(); appContext.setApplicationName("REEF_Unmanaged_AM_Test"); appContext.setAMContainerSpec(containerContext); appContext.setUnmanagedAM(true); appContext.setQueue("default"); final ApplicationId applicationId = appContext.getApplicationId(); LOG.log(Level.INFO, "Registered YARN application: {0}", applicationId); yarnClient.submitApplication(appContext); LOG.log(Level.INFO, "YARN application submitted: {0}", applicationId); addToken(yarnClient.getAMRMToken(applicationId)); // Start the AM final AMRMClientAsync<AMRMClient.ContainerRequest> rmClient = AMRMClientAsync.createAMRMClientAsync(1000, this); rmClient.init(yarnConfig); rmClient.start(); final NMClientAsync nmClient = new NMClientAsyncImpl(this); nmClient.init(yarnConfig); nmClient.start(); final RegisterApplicationMasterResponse registration = rmClient .registerApplicationMaster(NetUtils.getHostname(), -1, null); LOG.log(Level.INFO, "Unmanaged AM is running: {0}", registration); rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "Success!", null); LOG.log(Level.INFO, "Unregistering AM: state {0}", rmClient.getServiceState()); // Shutdown the AM rmClient.stop(); nmClient.stop(); // Get the final application report final ApplicationReport appReport = yarnClient.getApplicationReport(applicationId); final YarnApplicationState appState = appReport.getYarnApplicationState(); final FinalApplicationStatus finalAttemptStatus = appReport.getFinalApplicationStatus(); LOG.log(Level.INFO, "Application {0} final attempt {1} status: {2}/{3}", new Object[] { applicationId, appReport.getCurrentApplicationAttemptId(), appState, finalAttemptStatus }); Assert.assertEquals("Application must be in FINISHED state", YarnApplicationState.FINISHED, appState); Assert.assertEquals("Final status must be SUCCEEDED", FinalApplicationStatus.SUCCEEDED, finalAttemptStatus); // Shutdown YARN client yarnClient.stop(); }
From source file:org.apache.reef.runtime.yarn.driver.YarnContainerManager.java
License:Apache License
void onStop() { LOG.log(Level.FINE, "Stop Runtime: RM status {0}", this.resourceManager.getServiceState()); if (this.resourceManager.getServiceState() == Service.STATE.STARTED) { // invariant: if RM is still running then we declare success. try {//from w ww. ja va2 s . c o m this.reefEventHandlers.close(); this.resourceManager.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null); this.resourceManager.close(); } catch (final Exception e) { LOG.log(Level.WARNING, "Error shutting down YARN application", e); } } if (this.nodeManager.getServiceState() == Service.STATE.STARTED) { try { this.nodeManager.close(); } catch (final IOException e) { LOG.log(Level.WARNING, "Error closing YARN Node Manager", e); } } }
From source file:org.apache.samza.job.yarn.SamzaTaskManager.java
License:Apache License
/** * This methods handles the onContainerCompleted callback from the RM. Based on the ContainerExitStatus, it decides * whether a container that exited is marked as complete or failure. *///from w w w . ja v a 2 s .co m @Override public void onContainerCompleted(ContainerStatus containerStatus) { String containerIdStr = ConverterUtils.toString(containerStatus.getContainerId()); int containerId = -1; for (Map.Entry<Integer, YarnContainer> entry : state.runningContainers.entrySet()) { if (entry.getValue().id().equals(containerStatus.getContainerId())) { containerId = entry.getKey(); break; } } state.runningContainers.remove(containerId); int exitStatus = containerStatus.getExitStatus(); switch (exitStatus) { case ContainerExitStatus.SUCCESS: log.info("Container {} completed successfully.", containerIdStr); state.completedContainers.incrementAndGet(); if (containerId != -1) { state.finishedContainers.add(containerId); containerFailures.remove(containerId); } if (state.completedContainers.get() == state.containerCount) { log.info("Setting job status to SUCCEEDED, since all containers have been marked as completed."); state.status = FinalApplicationStatus.SUCCEEDED; } break; case ContainerExitStatus.DISKS_FAILED: case ContainerExitStatus.ABORTED: case ContainerExitStatus.PREEMPTED: log.info( "Got an exit code of {}. This means that container {} was " + "killed by YARN, either due to being released by the application " + "master or being 'lost' due to node failures etc. or due to preemption by the RM", exitStatus, containerIdStr); state.releasedContainers.incrementAndGet(); // If this container was assigned some partitions (a containerId), then // clean up, and request a new container for the tasks. This only // should happen if the container was 'lost' due to node failure, not // if the AM released the container. if (containerId != -1) { log.info( "Released container {} was assigned task group ID {}. Requesting a new container for the task group.", containerIdStr, containerId); state.neededContainers.incrementAndGet(); state.jobHealthy.set(false); // request a container on new host containerAllocator.requestContainer(containerId, ContainerAllocator.ANY_HOST); } break; default: // TODO: Handle failure more intelligently. Should track NodeFailures! log.info("Container failed for some reason. Let's start it again"); log.info("Container " + containerIdStr + " failed with exit code " + exitStatus + " - " + containerStatus.getDiagnostics()); state.failedContainers.incrementAndGet(); state.failedContainersStatus.put(containerIdStr, containerStatus); state.jobHealthy.set(false); if (containerId != -1) { state.neededContainers.incrementAndGet(); // Find out previously running container location String lastSeenOn = state.jobCoordinator.jobModel().getContainerToHostValue(containerId, SetContainerHostMapping.HOST_KEY); if (!hostAffinityEnabled || lastSeenOn == null) { lastSeenOn = ContainerAllocator.ANY_HOST; } // A container failed for an unknown reason. Let's check to see if // we need to shutdown the whole app master if too many container // failures have happened. The rules for failing are that the // failure count for a task group id must be > the configured retry // count, and the last failure (the one prior to this one) must have // happened less than retry window ms ago. If retry count is set to // 0, the app master will fail on any container failure. If the // retry count is set to a number < 0, a container failure will // never trigger an app master failure. int retryCount = yarnConfig.getContainerRetryCount(); int retryWindowMs = yarnConfig.getContainerRetryWindowMs(); if (retryCount == 0) { log.error( "Container ID {} ({}) failed, and retry count is set to 0, so shutting down the application master, and marking the job as failed.", containerId, containerIdStr); tooManyFailedContainers = true; } else if (retryCount > 0) { int currentFailCount; long lastFailureTime; if (containerFailures.containsKey(containerId)) { ContainerFailure failure = containerFailures.get(containerId); currentFailCount = failure.getCount() + 1; lastFailureTime = failure.getLastFailure(); } else { currentFailCount = 1; lastFailureTime = 0L; } if (currentFailCount >= retryCount) { long lastFailureMsDiff = System.currentTimeMillis() - lastFailureTime; if (lastFailureMsDiff < retryWindowMs) { log.error("Container ID " + containerId + "(" + containerIdStr + ") has failed " + currentFailCount + " times, with last failure " + lastFailureMsDiff + "ms ago. This is greater than retry count of " + retryCount + " and window of " + retryWindowMs + "ms , so shutting down the application master, and marking the job as failed."); // We have too many failures, and we're within the window // boundary, so reset shut down the app master. tooManyFailedContainers = true; state.status = FinalApplicationStatus.FAILED; } else { log.info( "Resetting fail count for container ID {} back to 1, since last container failure ({}) for " + "this container ID was outside the bounds of the retry window.", containerId, containerIdStr); // Reset counter back to 1, since the last failure for this // container happened outside the window boundary. containerFailures.put(containerId, new ContainerFailure(1, System.currentTimeMillis())); } } else { log.info("Current fail count for container ID {} is {}.", containerId, currentFailCount); containerFailures.put(containerId, new ContainerFailure(currentFailCount, System.currentTimeMillis())); } } if (!tooManyFailedContainers) { // Request a new container containerAllocator.requestContainer(containerId, lastSeenOn); } } } }
From source file:org.apache.slider.client.SliderClient.java
License:Apache License
/** * Build an exit code for an application from its report. * If the report parameter is null, its interpreted as a timeout * @param report report application report * @return the exit code/*from ww w. j a va 2 s .co m*/ * @throws IOException * @throws YarnException */ private int buildExitCode(ApplicationReport report) throws IOException, YarnException { if (null == report) { return EXIT_TIMED_OUT; } YarnApplicationState state = report.getYarnApplicationState(); FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); switch (state) { case FINISHED: if (FinalApplicationStatus.SUCCEEDED == dsStatus) { log.info("Application has completed successfully"); return EXIT_SUCCESS; } else { log.info("Application finished unsuccessfully." + "YarnState = {}, DSFinalStatus = {} Breaking monitoring loop", state, dsStatus); return EXIT_YARN_SERVICE_FINISHED_WITH_ERROR; } case KILLED: log.info("Application did not finish. YarnState={}, DSFinalStatus={}", state, dsStatus); return EXIT_YARN_SERVICE_KILLED; case FAILED: log.info("Application Failed. YarnState={}, DSFinalStatus={}", state, dsStatus); return EXIT_YARN_SERVICE_FAILED; default: //not in any of these states return EXIT_SUCCESS; } }
From source file:org.apache.slider.server.appmaster.rpc.SliderIPCService.java
License:Apache License
@Override //SliderClusterProtocol public Messages.StopClusterResponseProto stopCluster(Messages.StopClusterRequestProto request) throws IOException, YarnException { onRpcCall("stop"); String message = request.getMessage(); if (message == null) { message = "application stopped by client"; }/*from w ww . j a v a 2 s . c o m*/ ActionStopSlider stopSlider = new ActionStopSlider(message, 1000, TimeUnit.MILLISECONDS, LauncherExitCodes.EXIT_SUCCESS, FinalApplicationStatus.SUCCEEDED, message); log.info("SliderAppMasterApi.stopCluster: {}", stopSlider); schedule(stopSlider); return Messages.StopClusterResponseProto.getDefaultInstance(); }
From source file:org.apache.slider.server.appmaster.rpc.SliderIPCService.java
License:Apache License
@Override //SliderClusterProtocol public Messages.UpgradeContainersResponseProto upgradeContainers(Messages.UpgradeContainersRequestProto request) throws IOException, YarnException { onRpcCall("upgrade"); String message = request.getMessage(); if (message == null) { message = "application containers upgraded by client"; }/*from w w w. j av a2 s . com*/ ActionUpgradeContainers upgradeContainers = new ActionUpgradeContainers("Upgrade containers", 1000, TimeUnit.MILLISECONDS, LauncherExitCodes.EXIT_SUCCESS, FinalApplicationStatus.SUCCEEDED, request.getContainerList(), request.getComponentList(), message); log.info("SliderAppMasterApi.upgradeContainers: {}", upgradeContainers); schedule(upgradeContainers); return Messages.UpgradeContainersResponseProto.getDefaultInstance(); }
From source file:org.apache.slider.server.appmaster.SliderAppMaster.java
License:Apache License
/** * RM wants to shut down the AM/*from w ww. j av a2 s . co m*/ */ @Override //AMRMClientAsync public void onShutdownRequest() { LOG_YARN.info("Shutdown Request received"); signalAMComplete(new ActionStopSlider("stop", EXIT_SUCCESS, FinalApplicationStatus.SUCCEEDED, "Shutdown requested from RM")); }
From source file:org.apache.slider.server.appmaster.SliderAppMaster.java
License:Apache License
@Override //SliderClusterProtocol public Messages.StopClusterResponseProto stopCluster(Messages.StopClusterRequestProto request) throws IOException, YarnException { onRpcCall("stopCluster()"); String message = request.getMessage(); if (message == null) { message = "application frozen by client"; }// ww w.jav a 2 s . c o m ActionStopSlider stopSlider = new ActionStopSlider(message, 1000, TimeUnit.MILLISECONDS, LauncherExitCodes.EXIT_SUCCESS, FinalApplicationStatus.SUCCEEDED, message); log.info("SliderAppMasterApi.stopCluster: {}", stopSlider); schedule(stopSlider); return Messages.StopClusterResponseProto.getDefaultInstance(); }
From source file:org.apache.slider.server.appmaster.web.rest.application.actions.RestActionStop.java
License:Apache License
public StopResponse stop(HttpServletRequest request, UriInfo uriInfo, String body) { String verb = request.getMethod(); log.info("Ping {}", verb); StopResponse response = new StopResponse(); response.verb = verb;// w w w . ja v a 2s . c o m long time = System.currentTimeMillis(); String text = String.format(Locale.ENGLISH, "Stopping action %s received at %tc", verb, time); response.text = text; log.info(text); ActionStopSlider stopSlider = new ActionStopSlider(text, 1000, TimeUnit.MILLISECONDS, LauncherExitCodes.EXIT_SUCCESS, FinalApplicationStatus.SUCCEEDED, text); log.info("SliderAppMasterApi.stopCluster: {}", stopSlider); slider.getQueues().schedule(stopSlider); return response; }
From source file:org.apache.sysml.yarn.DMLAppMaster.java
License:Apache License
public void runApplicationMaster(String[] args) throws YarnException, IOException { _conf = new YarnConfiguration(); //obtain application ID String containerIdString = System.getenv(Environment.CONTAINER_ID.name()); ContainerId containerId = ConverterUtils.toContainerId(containerIdString); _appId = containerId.getApplicationAttemptId().getApplicationId(); LOG.info("SystemML appplication master (applicationID: " + _appId + ")"); //initialize clients to ResourceManager AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient(); rmClient.init(_conf);/*from w w w. j a v a 2 s . c o m*/ rmClient.start(); //register with ResourceManager rmClient.registerApplicationMaster("", 0, ""); //host, port for rm communication LOG.debug("Registered the SystemML application master with resource manager"); //start status reporter to ResourceManager DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000); reporter.start(); LOG.debug("Started status reporter (heartbeat to resource manager)"); //set DMLscript app master context DMLScript.setActiveAM(); //parse input arguments String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs(); //run SystemML CP FinalApplicationStatus status = null; try { //core dml script execution (equivalent to non-AM runtime) boolean success = DMLScript.executeScript(_conf, otherArgs); if (success) status = FinalApplicationStatus.SUCCEEDED; else status = FinalApplicationStatus.FAILED; } catch (DMLScriptException ex) { LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t" + ex.getMessage()); status = FinalApplicationStatus.FAILED; writeMessageToHDFSWorkingDir(ex.getMessage()); } catch (Exception ex) { LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex); status = FinalApplicationStatus.FAILED; } finally { //stop periodic status reports reporter.stopStatusReporter(); LOG.debug("Stopped status reporter"); //unregister resource manager client rmClient.unregisterApplicationMaster(status, "", ""); LOG.debug("Unregistered the SystemML application master"); } }