List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED
FinalApplicationStatus FAILED
To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED.
Click Source Link
From source file:org.apache.flink.yarn.appMaster.ApplicationMaster.java
License:Apache License
private void run() throws Exception { heapLimit = Utils.calculateHeapSize(memoryPerTaskManager); nmClient = NMClient.createNMClient(); nmClient.init(conf);//from w ww . ja v a 2s. co m nmClient.start(); nmClient.cleanupRunningContainersOnStop(true); // Register with ResourceManager String url = "http://" + applicationMasterHost + ":" + jobManagerWebPort; LOG.info("Registering ApplicationMaster with tracking url " + url); rmClient.registerApplicationMaster(applicationMasterHost, 0, url); // Priority for worker containers - priorities are intra-application Priority priority = Records.newRecord(Priority.class); priority.setPriority(0); // Resource requirements for worker containers Resource capability = Records.newRecord(Resource.class); capability.setMemory(memoryPerTaskManager); capability.setVirtualCores(coresPerTaskManager); // Make container requests to ResourceManager for (int i = 0; i < taskManagerCount; ++i) { ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority); LOG.info("Requesting TaskManager container " + i); rmClient.addContainerRequest(containerAsk); } LocalResource flinkJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); // register Flink Jar with remote HDFS final Path remoteJarPath = new Path(remoteFlinkJarPath); Utils.registerLocalResource(fs, remoteJarPath, flinkJar); // register conf with local fs. Utils.setupLocalResource(conf, fs, appId, new Path("file://" + currDir + "/flink-conf-modified.yaml"), flinkConf, new Path(clientHomeDir)); LOG.info("Prepared local resource for modified yaml: " + flinkConf); hasLogback = new File(currDir + "/logback.xml").exists(); // prepare the files to ship LocalResource[] remoteShipRsc = null; String[] remoteShipPaths = shipListString.split(","); if (!shipListString.isEmpty()) { remoteShipRsc = new LocalResource[remoteShipPaths.length]; { // scope for i int i = 0; for (String remoteShipPathStr : remoteShipPaths) { if (remoteShipPathStr == null || remoteShipPathStr.isEmpty()) { continue; } remoteShipRsc[i] = Records.newRecord(LocalResource.class); Path remoteShipPath = new Path(remoteShipPathStr); Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]); i++; } } } // copy resources to the TaskManagers. taskManagerLocalResources = new HashMap<String, LocalResource>(2); taskManagerLocalResources.put("flink.jar", flinkJar); taskManagerLocalResources.put("flink-conf.yaml", flinkConf); // add ship resources if (!shipListString.isEmpty()) { Preconditions.checkNotNull(remoteShipRsc); for (int i = 0; i < remoteShipPaths.length; i++) { taskManagerLocalResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]); } } completedContainers = 0; // Obtain allocated containers and launch StringBuffer containerDiag = new StringBuffer(); // diagnostics log for the containers. allocateOutstandingContainer(containerDiag); LOG.info("Allocated all initial containers"); // Now wait for containers to complete while (completedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(completedContainers / taskManagerCount); for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); logDeadContainer(status, containerDiag); } Thread.sleep(5000); } if (isClosed) { return; } // Un-register with ResourceManager final String diagnosticsMessage = "Application Master shut down after all " + "containers finished\n" + containerDiag.toString(); LOG.info("Diagnostics message: " + diagnosticsMessage); rmClient.unregisterApplicationMaster(FinalApplicationStatus.FAILED, diagnosticsMessage, ""); this.close(); amRpcServer.stop(); // we need to manually stop the RPC service. Usually, the Client stops the RPC, // but at this point, the AM has been shut down (for some reason). LOG.info("Application Master shutdown completed."); }
From source file:org.apache.flink.yarn.appMaster.ApplicationMaster.java
License:Apache License
@Override public BooleanValue shutdownAM() throws Exception { LOG.info("Client requested shutdown of AM"); FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED; String finalMessage = ""; if (isFailed) { finalStatus = FinalApplicationStatus.FAILED; finalMessage = "Application Master failed"; isFailed = false; // allow a proper shutdown isFailed.notifyAll();/*from w w w . ja va 2 s. co m*/ } rmClient.unregisterApplicationMaster(finalStatus, finalMessage, ""); this.close(); return new BooleanValue(true); }
From source file:org.apache.flink.yarn.appMaster.ApplicationMaster.java
License:Apache License
public static void main(String[] args) throws Exception { // execute Application Master using the client's user final String yarnClientUsername = System.getenv(Client.ENV_CLIENT_USERNAME); LOG.info("YARN daemon runs as '" + UserGroupInformation.getCurrentUser().getShortUserName() + "' setting" + " user to execute Flink ApplicationMaster/JobManager to '" + yarnClientUsername + "'"); UserGroupInformation ugi = UserGroupInformation.createRemoteUser(yarnClientUsername); for (Token<? extends TokenIdentifier> toks : UserGroupInformation.getCurrentUser().getTokens()) { ugi.addToken(toks);//from www . ja v a 2s .com } ugi.doAs(new PrivilegedAction<Object>() { @Override public Object run() { AMRMClient<ContainerRequest> rmClient = null; ApplicationMaster am = null; try { Configuration conf = Utils.initializeYarnConfiguration(); rmClient = AMRMClient.createAMRMClient(); rmClient.init(conf); rmClient.start(); // run the actual Application Master am = new ApplicationMaster(conf); am.generateConfigurationFile(); am.startJobManager(); am.setRMClient(rmClient); am.run(); } catch (Throwable e) { LOG.error("Error while running the application master", e); // the AM is not available. Report error through the unregister function. if (rmClient != null && am == null) { try { rmClient.unregisterApplicationMaster(FinalApplicationStatus.FAILED, "Flink YARN Application master" + " stopped unexpectedly with an exception.\n" + StringUtils.stringifyException(e), ""); } catch (Exception e1) { LOG.error("Unable to fail the application master", e1); } LOG.info("AM unregistered from RM"); return null; } if (rmClient == null) { LOG.error("Unable to unregister AM since the RM client is not available"); } if (am != null) { LOG.info("Writing error into internal message system"); am.setFailed(true); am.addMessage(new Message("The application master failed with an exception:\n" + StringUtils.stringifyException(e))); am.keepRPCAlive(); } } return null; } }); }
From source file:org.apache.flink.yarn.FlinkYarnCluster.java
License:Apache License
/** * Shutdown the YARN cluster./* w w w .ja v a2 s . c o m*/ * @param failApplication whether we should fail the YARN application (in case of errors in Flink) */ @Override public void shutdown(boolean failApplication) { if (!isConnected) { throw new IllegalStateException("The cluster has been connected to the ApplicationMaster."); } if (hasBeenShutDown.getAndSet(true)) { return; } try { Runtime.getRuntime().removeShutdownHook(clientShutdownHook); } catch (IllegalStateException e) { // we are already in the shutdown hook } if (actorSystem != null) { LOG.info("Sending shutdown request to the Application Master"); if (applicationClient != ActorRef.noSender()) { try { FinalApplicationStatus finalStatus; if (failApplication) { finalStatus = FinalApplicationStatus.FAILED; } else { finalStatus = FinalApplicationStatus.SUCCEEDED; } Future<Object> response = Patterns.ask(applicationClient, new YarnMessages.LocalStopYarnSession( finalStatus, "Flink YARN Client requested shutdown"), new Timeout(akkaDuration)); Await.ready(response, akkaDuration); } catch (Exception e) { LOG.warn("Error while stopping YARN Application Client", e); } } actorSystem.shutdown(); actorSystem.awaitTermination(); actorSystem = null; } LOG.info("Deleting files in " + sessionFilesDir); try { FileSystem shutFS = FileSystem.get(hadoopConfig); shutFS.delete(sessionFilesDir, true); // delete conf and jar file. shutFS.close(); } catch (IOException e) { LOG.error("Could not delete the Flink jar and configuration files in HDFS..", e); } try { actorRunner.join(1000); // wait for 1 second } catch (InterruptedException e) { LOG.warn("Shutdown of the actor runner was interrupted", e); Thread.currentThread().interrupt(); } try { pollingRunner.stopRunner(); pollingRunner.join(1000); } catch (InterruptedException e) { LOG.warn("Shutdown of the polling runner was interrupted", e); Thread.currentThread().interrupt(); } LOG.info("YARN Client is shutting down"); yarnClient.stop(); // actorRunner is using the yarnClient. yarnClient = null; // set null to clearly see if somebody wants to access it afterwards. }
From source file:org.apache.flink.yarn.YarnFlinkResourceManager.java
License:Apache License
/** * Converts a Flink application status enum to a YARN application status enum. * @param status The Flink application status. * @return The corresponding YARN application status. *//* w ww. java2 s .com*/ private FinalApplicationStatus getYarnStatus(ApplicationStatus status) { if (status == null) { return FinalApplicationStatus.UNDEFINED; } else { switch (status) { case SUCCEEDED: return FinalApplicationStatus.SUCCEEDED; case FAILED: return FinalApplicationStatus.FAILED; case CANCELED: return FinalApplicationStatus.KILLED; default: return FinalApplicationStatus.UNDEFINED; } } }
From source file:org.apache.giraph.yarn.GiraphApplicationMaster.java
License:Apache License
/** * Call when the application is done//from ww w.j a v a2 s. c o m * @return if all containers succeed */ private boolean finish() { // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; boolean success = true; if (failedCount.get() == 0 && completedCount.get() == containersToLaunch) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + containersToLaunch + ", completed=" + completedCount.get() + ", failed=" + failedCount.get(); success = false; } try { amRMClient.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } amRMClient.stop(); return success; }
From source file:org.apache.hama.bsp.BSPApplicationMaster.java
License:Apache License
private void cleanup() throws YarnException, IOException { syncServer.stop();/*from www . ja v a 2s. co m*/ if (threadPool != null && !threadPool.isShutdown()) { threadPool.shutdownNow(); } clientServer.stop(); taskServer.stop(); FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class); switch (job.getState()) { case SUCCESS: finishReq.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED); break; case KILLED: finishReq.setFinalApplicationStatus(FinalApplicationStatus.KILLED); break; case FAILED: finishReq.setFinalApplicationStatus(FinalApplicationStatus.FAILED); break; default: finishReq.setFinalApplicationStatus(FinalApplicationStatus.FAILED); } this.amrmRPC.finishApplicationMaster(finishReq); }
From source file:org.apache.hoya.yarn.appmaster.HoyaAppMaster.java
License:Apache License
/** * shut down the cluster /* w w w .jav a 2s . co m*/ */ private synchronized void finish() { FinalApplicationStatus appStatus; log.info("Triggering shutdown of the AM: {}", amCompletionReason); String appMessage = amCompletionReason; //stop the daemon & grab its exit code int exitCode = amExitCode; success = exitCode == 0 || exitCode == 3; appStatus = success ? FinalApplicationStatus.SUCCEEDED : FinalApplicationStatus.FAILED; if (!spawnedProcessExitedBeforeShutdownTriggered) { //stopped the forked process but don't worry about its exit code exitCode = stopForkedProcess(); log.debug("Stopped forked process: exit code={}", exitCode); } //stop any launches in progress launchService.stop(); //now release all containers releaseAllContainers(); // When the application completes, it should send a finish application // signal to the RM log.info("Application completed. Signalling finish to RM"); //if there were failed containers and the app isn't already down as failing, it is now int failedContainerCount = appState.getFailedCountainerCount(); if (failedContainerCount != 0 && appStatus == FinalApplicationStatus.SUCCEEDED) { appStatus = FinalApplicationStatus.FAILED; appMessage = "Completed with exit code = " + exitCode + " - " + getContainerDiagnosticInfo(); success = false; } try { log.info("Unregistering AM status={} message={}", appStatus, appMessage); asyncRMClient.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException e) { log.info("Failed to unregister application: " + e, e); } catch (IOException e) { log.info("Failed to unregister application: " + e, e); } }
From source file:org.apache.ignite.yarn.ApplicationMaster.java
License:Apache License
/** * Runs application master.//from w w w . j ava 2 s . c om * * @throws Exception If failed. */ public void run() throws Exception { // Register with ResourceManager rmClient.registerApplicationMaster("", 0, ""); log.log(Level.INFO, "Application master registered."); // Priority for worker containers - priorities are intra-application Priority priority = Records.newRecord(Priority.class); priority.setPriority(0); try { // Check ignite cluster. while (!nmClient.isInState(Service.STATE.STOPPED)) { int runningCnt = containers.size(); if (runningCnt < props.instances() && checkAvailableResource()) { // Resource requirements for worker containers. Resource capability = Records.newRecord(Resource.class); capability.setMemory((int) props.totalMemoryPerNode()); capability.setVirtualCores((int) props.cpusPerNode()); for (int i = 0; i < props.instances() - runningCnt; ++i) { // Make container requests to ResourceManager AMRMClient.ContainerRequest containerAsk = new AMRMClient.ContainerRequest(capability, null, null, priority); rmClient.addContainerRequest(containerAsk); log.log(Level.INFO, "Making request. Memory: {0}, cpu {1}.", new Object[] { props.totalMemoryPerNode(), props.cpusPerNode() }); } } TimeUnit.MILLISECONDS.sleep(schedulerTimeout); } } catch (InterruptedException ignored) { // Un-register with ResourceManager rmClient.unregisterApplicationMaster(FinalApplicationStatus.KILLED, "", ""); log.log(Level.WARNING, "Application master killed."); } catch (Exception e) { // Un-register with ResourceManager rmClient.unregisterApplicationMaster(FinalApplicationStatus.FAILED, "", ""); log.log(Level.SEVERE, "Application master failed.", e); } }
From source file:org.apache.oozie.action.hadoop.TestLauncherAMCallbackNotifier.java
License:Apache License
public void testNotifyBackgroundActionWhenSubmitFailsWithFailed() throws Exception { Configuration conf = setupEmbeddedContainer(QueryServlet.class, "/count/*", "/count/?status=$jobStatus", null);/*from w w w . ja va 2 s.c o m*/ LauncherAMCallbackNotifier cn = new LauncherAMCallbackNotifier(conf); assertNull(QueryServlet.lastQueryString); cn.notifyURL(OozieActionResult.FAILED); waitForCallbackAndCheckResult(FinalApplicationStatus.FAILED.toString()); }