List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED
FinalApplicationStatus SUCCEEDED
To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED.
Click Source Link
From source file:edu.uci.ics.asterix.aoya.AsterixApplicationMaster.java
License:Apache License
/** * Clean up, whether or not we were successful. */// w ww .j ava 2 s .c o m private void finish() { // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try { launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); //from https://stackoverflow.com/questions/4812570/how-to-store-printstacktrace-into-a-string StringWriter errors = new StringWriter(); e.printStackTrace(new PrintWriter(errors)); LOG.error(errors.toString()); } } // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; success = true; if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get(); success = false; } try { resourceManager.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } done = true; resourceManager.stop(); }
From source file:eu.stratosphere.yarn.ApplicationMaster.java
License:Apache License
private void run() throws Exception { //Utils.logFilesInCurrentDirectory(LOG); // Initialize clients to ResourceManager and NodeManagers Configuration conf = Utils.initializeYarnConfiguration(); FileSystem fs = FileSystem.get(conf); Map<String, String> envs = System.getenv(); final String currDir = envs.get(Environment.PWD.key()); final String logDirs = envs.get(Environment.LOG_DIRS.key()); final String ownHostname = envs.get(Environment.NM_HOST.key()); final String appId = envs.get(Client.ENV_APP_ID); final String clientHomeDir = envs.get(Client.ENV_CLIENT_HOME_DIR); final String applicationMasterHost = envs.get(Environment.NM_HOST.key()); final String remoteStratosphereJarPath = envs.get(Client.STRATOSPHERE_JAR_PATH); final String shipListString = envs.get(Client.ENV_CLIENT_SHIP_FILES); final String yarnClientUsername = envs.get(Client.ENV_CLIENT_USERNAME); final int taskManagerCount = Integer.valueOf(envs.get(Client.ENV_TM_COUNT)); final int memoryPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_MEMORY)); final int coresPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_CORES)); int heapLimit = Utils.calculateHeapSize(memoryPerTaskManager); if (currDir == null) { throw new RuntimeException("Current directory unknown"); }//from w w w.j a v a 2 s . c om if (ownHostname == null) { throw new RuntimeException("Own hostname (" + Environment.NM_HOST + ") not set."); } LOG.info("Working directory " + currDir); // load Stratosphere configuration. Utils.getStratosphereConfiguration(currDir); final String localWebInterfaceDir = currDir + "/resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME; // Update yaml conf -> set jobManager address to this machine's address. FileInputStream fis = new FileInputStream(currDir + "/stratosphere-conf.yaml"); BufferedReader br = new BufferedReader(new InputStreamReader(fis)); Writer output = new BufferedWriter(new FileWriter(currDir + "/stratosphere-conf-modified.yaml")); String line; while ((line = br.readLine()) != null) { if (line.contains(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)) { output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n"); } else if (line.contains(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY)) { output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + "\n"); } else { output.append(line + "\n"); } } // just to make sure. output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n"); output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + localWebInterfaceDir + "\n"); output.append(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY + ": " + logDirs + "\n"); output.close(); br.close(); File newConf = new File(currDir + "/stratosphere-conf-modified.yaml"); if (!newConf.exists()) { LOG.warn("modified yaml does not exist!"); } Utils.copyJarContents("resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME, ApplicationMaster.class.getProtectionDomain().getCodeSource().getLocation().getPath()); JobManager jm; { String pathToNepheleConfig = currDir + "/stratosphere-conf-modified.yaml"; String[] args = { "-executionMode", "cluster", "-configDir", pathToNepheleConfig }; // start the job manager jm = JobManager.initialize(args); // Start info server for jobmanager jm.startInfoServer(); } AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient(); rmClient.init(conf); rmClient.start(); NMClient nmClient = NMClient.createNMClient(); nmClient.init(conf); nmClient.start(); // Register with ResourceManager LOG.info("registering ApplicationMaster"); rmClient.registerApplicationMaster(applicationMasterHost, 0, "http://" + applicationMasterHost + ":" + GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, "undefined")); // Priority for worker containers - priorities are intra-application Priority priority = Records.newRecord(Priority.class); priority.setPriority(0); // Resource requirements for worker containers Resource capability = Records.newRecord(Resource.class); capability.setMemory(memoryPerTaskManager); capability.setVirtualCores(coresPerTaskManager); // Make container requests to ResourceManager for (int i = 0; i < taskManagerCount; ++i) { ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority); LOG.info("Requesting TaskManager container " + i); rmClient.addContainerRequest(containerAsk); } LocalResource stratosphereJar = Records.newRecord(LocalResource.class); LocalResource stratosphereConf = Records.newRecord(LocalResource.class); // register Stratosphere Jar with remote HDFS final Path remoteJarPath = new Path(remoteStratosphereJarPath); Utils.registerLocalResource(fs, remoteJarPath, stratosphereJar); // register conf with local fs. Path remoteConfPath = Utils.setupLocalResource(conf, fs, appId, new Path("file://" + currDir + "/stratosphere-conf-modified.yaml"), stratosphereConf, new Path(clientHomeDir)); LOG.info("Prepared localresource for modified yaml: " + stratosphereConf); boolean hasLog4j = new File(currDir + "/log4j.properties").exists(); // prepare the files to ship LocalResource[] remoteShipRsc = null; String[] remoteShipPaths = shipListString.split(","); if (!shipListString.isEmpty()) { remoteShipRsc = new LocalResource[remoteShipPaths.length]; { // scope for i int i = 0; for (String remoteShipPathStr : remoteShipPaths) { if (remoteShipPathStr == null || remoteShipPathStr.isEmpty()) { continue; } remoteShipRsc[i] = Records.newRecord(LocalResource.class); Path remoteShipPath = new Path(remoteShipPathStr); Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]); i++; } } } // respect custom JVM options in the YAML file final String javaOpts = GlobalConfiguration.getString(ConfigConstants.STRATOSPHERE_JVM_OPTIONS, ""); // Obtain allocated containers and launch int allocatedContainers = 0; int completedContainers = 0; while (allocatedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(0); for (Container container : response.getAllocatedContainers()) { LOG.info("Got new Container for TM " + container.getId() + " on host " + container.getNodeId().getHost()); ++allocatedContainers; // Launch container by create ContainerLaunchContext ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); String tmCommand = "$JAVA_HOME/bin/java -Xmx" + heapLimit + "m " + javaOpts; if (hasLog4j) { tmCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties"; } tmCommand += " eu.stratosphere.yarn.YarnTaskManagerRunner -configDir . " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stdout.log" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stderr.log"; ctx.setCommands(Collections.singletonList(tmCommand)); LOG.info("Starting TM with command=" + tmCommand); // copy resources to the TaskManagers. Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2); localResources.put("stratosphere.jar", stratosphereJar); localResources.put("stratosphere-conf.yaml", stratosphereConf); // add ship resources if (!shipListString.isEmpty()) { Preconditions.checkNotNull(remoteShipRsc); for (int i = 0; i < remoteShipPaths.length; i++) { localResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]); } } ctx.setLocalResources(localResources); // Setup CLASSPATH for Container (=TaskTracker) Map<String, String> containerEnv = new HashMap<String, String>(); Utils.setupEnv(conf, containerEnv); //add stratosphere.jar to class path. containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername); ctx.setEnvironment(containerEnv); UserGroupInformation user = UserGroupInformation.getCurrentUser(); try { Credentials credentials = user.getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); ctx.setTokens(securityTokens); } catch (IOException e) { LOG.warn("Getting current user info failed when trying to launch the container" + e.getMessage()); } LOG.info("Launching container " + allocatedContainers); nmClient.startContainer(container, ctx); } for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container (while allocating) " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); } Thread.sleep(100); } // Now wait for containers to complete while (completedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(completedContainers / taskManagerCount); for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); } Thread.sleep(5000); } LOG.info("Shutting down JobManager"); jm.shutdown(); // Un-register with ResourceManager rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", ""); }
From source file:gobblin.yarn.YarnService.java
License:Apache License
@Override protected void shutDown() throws IOException { LOGGER.info("Stopping the YarnService"); this.shutdownInProgress = true; try {//from w w w . ja v a2s . c o m ExecutorsUtils.shutdownExecutorService(this.containerLaunchExecutor, Optional.of(LOGGER)); // Stop the running containers for (Map.Entry<Container, String> entry : this.containerMap.values()) { LOGGER.info(String.format("Stopping container %s running participant %s", entry.getKey().getId(), entry.getValue())); this.nmClientAsync.stopContainerAsync(entry.getKey().getId(), entry.getKey().getNodeId()); } if (!this.containerMap.isEmpty()) { synchronized (this.allContainersStopped) { try { // Wait 5 minutes for the containers to stop this.allContainersStopped.wait(5 * 60 * 1000); LOGGER.info("All of the containers have been stopped"); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } } } this.amrmClientAsync.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null); } catch (IOException | YarnException e) { LOGGER.error("Failed to unregister the ApplicationMaster", e); } finally { try { this.closer.close(); } finally { if (this.gobblinMetrics.isPresent()) { this.gobblinMetrics.get().stopMetricsReporting(); } } } }
From source file:husky.server.HuskyApplicationMaster.java
License:Apache License
private void run() throws YarnException, IOException, InterruptedException, ExecutionException { LOG.info("Run App Master"); mRMClientListener = new HuskyRMCallbackHandler(this); mRMClient = AMRMClientAsync.createAMRMClientAsync(1000, mRMClientListener); mRMClient.init(mYarnConf);// ww w. j av a 2 s . c om mRMClient.start(); mContainerListener = new HuskyNMCallbackHandler(); mNMClient = NMClientAsync.createNMClientAsync(mContainerListener); mNMClient.init(mYarnConf); mNMClient.start(); // Register with ResourceManager LOG.info("registerApplicationMaster started"); mRMClient.registerApplicationMaster("", 0, ""); LOG.info("registerApplicationMaster done"); // Ask RM to start `mNumContainer` containers, each is a worker node LOG.info("Ask RM for " + mWorkerInfos.size() + " containers"); for (Pair<String, Integer> i : mWorkerInfos) { mRMClient.addContainerRequest(setupContainerAskForRMSpecific(i.getFirst())); } FinalApplicationStatus status = mRMClientListener.getFinalNumSuccess() == mWorkerInfos.size() ? FinalApplicationStatus.SUCCEEDED : FinalApplicationStatus.FAILED; mRMClient.unregisterApplicationMaster(status, mRMClientListener.getStatusReport(), null); }
From source file:hws.core.JobMaster.java
License:Apache License
public void runMainLoop() throws Exception { AMRMClientAsync<ContainerRequest> rmClient = AMRMClientAsync.createAMRMClientAsync(100, this); rmClient.init(getConfiguration());//from w w w. j a va 2 s .c om rmClient.start(); // Register with ResourceManager Logger.info("[AM] registerApplicationMaster 0"); rmClient.registerApplicationMaster("", 0, ""); Logger.info("[AM] registerApplicationMaster 1"); // Priority for worker containers - priorities are intra-application Priority priority = Records.newRecord(Priority.class); priority.setPriority(0); // Resource requirements for worker containers Resource capability = Records.newRecord(Resource.class); capability.setMemory(128); capability.setVirtualCores(1); final CountDownLatch doneLatch = new CountDownLatch(this.modulePipeline.size()); // Make container requests to ResourceManager for (ModuleInfo moduleInfo : this.modulePipeline) { //create containers for each instance of each module zk.createPersistent("/hadoop-watershed/" + this.appIdStr + "/" + moduleInfo.filterInfo().name(), ""); zk.createPersistent( "/hadoop-watershed/" + this.appIdStr + "/" + moduleInfo.filterInfo().name() + "/finish", ""); zk.createPersistent( "/hadoop-watershed/" + this.appIdStr + "/" + moduleInfo.filterInfo().name() + "/halted", ""); zk.subscribeChildChanges( "/hadoop-watershed/" + this.appIdStr + "/" + moduleInfo.filterInfo().name() + "/finish", createFinishListener(moduleInfo.filterInfo().name(), moduleInfo.numFilterInstances(), doneLatch)); for (int i = 0; i < moduleInfo.numFilterInstances(); i++) { this.numContainersToWaitFor++; ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority); Logger.info("[AM] Making res-req for " + moduleInfo.filterInfo().name() + " " + i); rmClient.addContainerRequest(containerAsk); } } //TODO: process for starting the whole application //create containers // -> create instances // -> start output channels and filters // -> start input channels in reversed topological order (considering that there is no cycle) // * if there is cycle, then inicially start in any order //TODO "send" the start signal via ZooKeeper Logger.info("[AM] waiting for containers to finish"); try { doneLatch.await(); //await the input threads to finish } catch (InterruptedException e) { Logger.fatal(e.toString()); //e.printStackTrace(); } /*while(!doneWithContainers()) { Thread.sleep(50); }*/ zk.createPersistent("/hadoop-watershed/" + appIdStr + "/done", ""); Logger.info("[AM] unregisterApplicationMaster 0"); // Un-register with ResourceManager rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", ""); Logger.info("[AM] unregisterApplicationMaster 1"); }
From source file:io.amient.yarn1.YarnClient.java
License:Open Source License
/** * This method should be called by the implementing application static main * method. It does all the work around creating a yarn application and * submitting the request to the yarn resource manager. The class given in * the appClass argument will be run inside the yarn-allocated master * container.//w w w . ja v a2 s .c om */ public static void submitApplicationMaster(Properties appConfig, Class<? extends YarnMaster> masterClass, String[] args, Boolean awaitCompletion) throws Exception { log.info("Yarn1 App Configuration:"); for (Object param : appConfig.keySet()) { log.info(param.toString() + " = " + appConfig.get(param).toString()); } String yarnConfigPath = appConfig.getProperty("yarn1.site", "/etc/hadoop"); String masterClassName = masterClass.getName(); appConfig.setProperty("yarn1.master.class", masterClassName); String applicationName = appConfig.getProperty("yarn1.application.name", masterClassName); log.info("--------------------------------------------------------------"); if (Boolean.valueOf(appConfig.getProperty("yarn1.local.mode", "false"))) { YarnMaster.run(appConfig, args); return; } int masterPriority = Integer.valueOf( appConfig.getProperty("yarn1.master.priority", String.valueOf(YarnMaster.DEFAULT_MASTER_PRIORITY))); int masterMemoryMb = Integer.valueOf(appConfig.getProperty("yarn1.master.memory.mb", String.valueOf(YarnMaster.DEFAULT_MASTER_MEMORY_MB))); int masterNumCores = Integer.valueOf( appConfig.getProperty("yarn1.master.num.cores", String.valueOf(YarnMaster.DEFAULT_MASTER_CORES))); String queue = appConfig.getProperty("yarn1.queue"); Configuration yarnConfig = new YarnConfiguration(); yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/core-site.xml")); yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/hdfs-site.xml")); yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/yarn-site.xml")); for (Map.Entry<Object, Object> entry : appConfig.entrySet()) { yarnConfig.set(entry.getKey().toString(), entry.getValue().toString()); } final org.apache.hadoop.yarn.client.api.YarnClient yarnClient = org.apache.hadoop.yarn.client.api.YarnClient .createYarnClient(); yarnClient.init(yarnConfig); yarnClient.start(); for (NodeReport report : yarnClient.getNodeReports(NodeState.RUNNING)) { log.debug("Node report:" + report.getNodeId() + " @ " + report.getHttpAddress() + " | " + report.getCapability()); } log.info("Submitting application master class " + masterClassName); YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); final ApplicationId appId = appResponse.getApplicationId(); if (appId == null) { System.exit(111); } else { appConfig.setProperty("am.timestamp", String.valueOf(appId.getClusterTimestamp())); appConfig.setProperty("am.id", String.valueOf(appId.getId())); } YarnClient.distributeResources(yarnConfig, appConfig, applicationName); String masterJvmArgs = appConfig.getProperty("yarn1.master.jvm.args", ""); YarnContainerContext masterContainer = new YarnContainerContext(yarnConfig, appConfig, masterJvmArgs, masterPriority, masterMemoryMb, masterNumCores, applicationName, YarnMaster.class, args); ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName(masterClassName); appContext.setResource(masterContainer.capability); appContext.setPriority(masterContainer.priority); appContext.setQueue(queue); appContext.setApplicationType(appConfig.getProperty("yarn1.application.type", "YARN")); appContext.setAMContainerSpec(masterContainer.createContainerLaunchContext()); log.info("Master container spec: " + masterContainer.capability); yarnClient.submitApplication(appContext); ApplicationReport report = yarnClient.getApplicationReport(appId); log.info("Tracking URL: " + report.getTrackingUrl()); if (awaitCompletion) { Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { if (!yarnClient.isInState(Service.STATE.STOPPED)) { log.info("Killing yarn application in shutdown hook"); try { yarnClient.killApplication(appId); } catch (Throwable e) { log.error("Failed to kill yarn application - please check YARN Resource Manager", e); } } } }); float lastProgress = -0.0f; while (true) { try { Thread.sleep(10000); report = yarnClient.getApplicationReport(appId); if (lastProgress != report.getProgress()) { lastProgress = report.getProgress(); log.info(report.getApplicationId() + " " + (report.getProgress() * 100.00) + "% " + (System.currentTimeMillis() - report.getStartTime()) + "(ms) " + report.getDiagnostics()); } if (!report.getFinalApplicationStatus().equals(FinalApplicationStatus.UNDEFINED)) { log.info(report.getApplicationId() + " " + report.getFinalApplicationStatus()); log.info("Tracking url: " + report.getTrackingUrl()); log.info("Finish time: " + ((System.currentTimeMillis() - report.getStartTime()) / 1000) + "(s)"); break; } } catch (Throwable e) { log.error("Master Heart Beat Error - terminating", e); yarnClient.killApplication(appId); Thread.sleep(2000); } } yarnClient.stop(); if (!report.getFinalApplicationStatus().equals(FinalApplicationStatus.SUCCEEDED)) { System.exit(112); } } yarnClient.stop(); }
From source file:io.amient.yarn1.YarnMaster.java
License:Open Source License
private void conclude() throws IOException, YarnException { if (localMode) { executor.shutdownNow();/* ww w . ja va 2 s .c o m*/ } else { rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", ""); } }
From source file:io.hops.tensorflow.ApplicationMaster.java
License:Apache License
private boolean finish() { // wait for completion. finish if any container fails while (!done && !(numCompletedWorkers.get() == numWorkers) && !(numFailedContainers.get() > 0)) { if (numAllocatedContainers.get() != numTotalContainers) { long timeLeft = appMasterStartTime + allocationTimeout - System.currentTimeMillis(); LOG.info("Awaits container allocation, timeLeft=" + timeLeft); if (timeLeft < 0) { LOG.warn("Container allocation timeout. Finish application attempt"); break; }/*from w ww . j av a 2s . c om*/ } try { Thread.sleep(200); } catch (InterruptedException ex) { } } if (timelineHandler.isClientNotNull()) { timelineHandler.publishApplicationAttemptEvent(YarntfEvent.YARNTF_APP_ATTEMPT_END); } // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try { launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(); } } // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); nmWrapper.getClient().stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; boolean success = true; if (numFailedContainers.get() == 0 && numCompletedWorkers.get() == numWorkers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed(workers)=" + numCompletedContainers.get() + "(" + numCompletedWorkers.get() + "), allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get(); LOG.info(appMessage); success = false; } try { rmWrapper.getClient().unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } rmWrapper.getClient().stop(); // Stop Timeline Client if (timelineHandler.isClientNotNull()) { timelineHandler.stopClient(); } return success; }
From source file:io.hops.tensorflow.Client.java
License:Apache License
/** * Monitor the submitted application for completion. * Kill application if time expires./* ww w . j av a 2 s . c o m*/ * * @param appId * Application Id of application to be monitored * @return true if application completed successfully * @throws YarnException * @throws IOException */ public boolean monitorApplication(ApplicationId appId) throws YarnException, IOException { YarnApplicationState oldState = null; while (true) { // Check app status every 1 second. try { Thread.sleep(1000); } catch (InterruptedException e) { LOG.debug("Thread sleep in monitoring loop interrupted"); } ApplicationReport report = yarnClient.getApplicationReport(appId); YarnApplicationState state = report.getYarnApplicationState(); FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); if (oldState != state) { LOG.info("Got application report from ASM for" + "\n\t appId=" + appId.getId() + "\n\t clientToAMToken=" + report.getClientToAMToken() + "\n\t appDiagnostics=" + report.getDiagnostics() + "\n\t appMasterHost=" + report.getHost() + "\n\t appQueue=" + report.getQueue() + "\n\t appMasterRpcPort=" + report.getRpcPort() + "\n\t appStartTime=" + report.getStartTime() + "\n\t yarnAppState=" + report.getYarnApplicationState().toString() + "\n\t distributedFinalState=" + report.getFinalApplicationStatus().toString() + "\n\t appTrackingUrl=" + report.getTrackingUrl() + "\n\t appUser=" + report.getUser()); oldState = state; } else { LOG.info("Got application report from ASM for " + appId + " (state: " + state + ")"); } if (YarnApplicationState.FINISHED == state) { if (FinalApplicationStatus.SUCCEEDED == dsStatus) { LOG.info("Application has completed successfully. Breaking monitoring loop"); return true; } else { LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) { LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { LOG.info("Reached client specified timeout for application. Killing application"); forceKillApplication(appId); return false; } } }
From source file:ml.shifu.guagua.yarn.GuaguaAppMaster.java
License:Apache License
/** * Call when the application is done//from ww w . j a v a 2 s . co m * * @return if all containers succeed */ private boolean finish() { // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); getNmClientAsync().stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; boolean success = true; if (getSuccessfulCount().get() == getContainersToLaunch()) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = String.format("Diagnostics total=%s, completed=%s, failed=%s.", getContainersToLaunch(), getCompletedCount().get(), getFailedCount().get()); success = false; } try { getAmRMClient().unregisterApplicationMaster(appStatus, appMessage, this.appMasterTrackingUrl); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } getAmRMClient().stop(); return success; }