List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED
FinalApplicationStatus SUCCEEDED
To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED.
Click Source Link
From source file:com.github.hdl.tensorflow.yarn.app.Client.java
License:Apache License
/** * Monitor the submitted application for completion. * @param appId Application Id of application to be monitored * @return true if application completed successfully * @throws YarnException//from www .jav a 2 s.c o m * @throws IOException */ private boolean monitorApplication(ApplicationId appId) throws YarnException, IOException { while (true) { try { Thread.sleep(1000); } catch (InterruptedException e) { LOG.debug("Thread sleep in monitoring loop interrupted"); } ApplicationReport report = yarnClient.getApplicationReport(appId); // LOG.info("Got application report from ASM for" // + ", appId=" + appId.getId() // + ", clientToAMToken=" + report.getClientToAMToken() // + ", appDiagnostics=" + report.getDiagnostics() // + ", appMasterHost=" + report.getHost() // + ", appQueue=" + report.getQueue() // + ", appMasterRpcPort=" + report.getRpcPort() // + ", appStartTime=" + report.getStartTime() // + ", yarnAppState=" + report.getYarnApplicationState().toString() // + ", tfAppFinalState=" + report.getFinalApplicationStatus().toString() // + ", appTrackingUrl=" + report.getTrackingUrl() // + ", appUser=" + report.getUser()); YarnApplicationState state = report.getYarnApplicationState(); FinalApplicationStatus tfStatus = report.getFinalApplicationStatus(); if (YarnApplicationState.RUNNING == state) { if (appRpc == null) { String hostname = report.getHost(); int port = report.getRpcPort(); LOG.info("application master rpc host: " + hostname + "; port: " + port); appRpc = new TFApplicationRpcClient(hostname, port).getRpc(); } if (appRpc != null && isEmptyString(clusterSpecJsonString)) { clusterSpecJsonString = appRpc.getClusterSpec(); LOG.info("cluster spec is " + clusterSpecJsonString); if (!isEmptyString(clusterSpecJsonString)) { TFClient tfClient = new TFClient(tfClientPy); if (isEnableTensorBoard) { Thread tensorBoardThread = new Thread() { @Override public void run() { tfClient.startTensorBoardClient(tensorboardEventDir); } }; tensorBoardThread.start(); LOG.info("Launching tensorboard ..."); try { Thread.sleep(3000); } catch (InterruptedException e) { LOG.debug("Thread sleep in monitoring loop interrupted"); } if (tensorBoardThread.isAlive()) { LOG.info("the tensorboard launched successfully on the localhost:6006"); } else { LOG.info("the tensorboard launched failed"); } } tfClient.startTensorflowClient(clusterSpecJsonString); } } } if (YarnApplicationState.FINISHED == state) { if (FinalApplicationStatus.SUCCEEDED == tfStatus) { LOG.info("Application has completed successfully. Breaking monitoring loop"); return true; } else { LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString() + ", tfAppFinalState=" + tfStatus.toString() + ". Breaking monitoring loop"); return false; } } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) { LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", tfAppFinalState=" + tfStatus.toString() + ". Breaking monitoring loop"); return false; } } }
From source file:com.gpiskas.yarn.AppMaster.java
License:Open Source License
public void run() throws Exception { conf = new YarnConfiguration(); // Create NM Client nmClient = NMClient.createNMClient(); nmClient.init(conf);/*from w w w . j av a 2s .c o m*/ nmClient.start(); // Create AM - RM Client AMRMClientAsync<ContainerRequest> rmClient = AMRMClientAsync.createAMRMClientAsync(1000, this); rmClient.init(conf); rmClient.start(); // Register with RM rmClient.registerApplicationMaster("", 0, ""); System.out.println("AppMaster: Registered"); // Priority for worker containers - priorities are intra-application Priority priority = Records.newRecord(Priority.class); priority.setPriority(0); // Resource requirements for worker containers Resource capability = Records.newRecord(Resource.class); capability.setMemory(128); capability.setVirtualCores(1); // Reqiest Containers from RM System.out.println("AppMaster: Requesting " + containerCount + " Containers"); for (int i = 0; i < containerCount; ++i) { rmClient.addContainerRequest(new ContainerRequest(capability, null, null, priority)); } while (!containersFinished()) { Thread.sleep(100); } System.out.println("AppMaster: Unregistered"); rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", ""); }
From source file:com.ibm.bi.dml.yarn.DMLAppMaster.java
License:Open Source License
/** * //from w w w. j av a 2 s . c o m * @param args * @throws YarnException * @throws IOException */ public void runApplicationMaster(String[] args) throws YarnException, IOException { _conf = new YarnConfiguration(); //obtain application ID String containerIdString = System.getenv(Environment.CONTAINER_ID.name()); ContainerId containerId = ConverterUtils.toContainerId(containerIdString); _appId = containerId.getApplicationAttemptId().getApplicationId(); LOG.info("SystemML appplication master (applicationID: " + _appId + ")"); //initialize clients to ResourceManager AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient(); rmClient.init(_conf); rmClient.start(); //register with ResourceManager rmClient.registerApplicationMaster("", 0, ""); //host, port for rm communication LOG.debug("Registered the SystemML application master with resource manager"); //start status reporter to ResourceManager DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000); reporter.start(); LOG.debug("Started status reporter (heartbeat to resource manager)"); //set DMLscript app master context DMLScript.setActiveAM(); //parse input arguments String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs(); //run SystemML CP FinalApplicationStatus status = null; try { //core dml script execution (equivalent to non-AM runtime) boolean success = DMLScript.executeScript(_conf, otherArgs); if (success) status = FinalApplicationStatus.SUCCEEDED; else status = FinalApplicationStatus.FAILED; } catch (DMLScriptException ex) { LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t" + ex.getMessage()); status = FinalApplicationStatus.FAILED; writeMessageToHDFSWorkingDir(ex.getMessage()); } catch (Exception ex) { LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex); status = FinalApplicationStatus.FAILED; } finally { //stop periodic status reports reporter.stopStatusReporter(); LOG.debug("Stopped status reporter"); //unregister resource manager client rmClient.unregisterApplicationMaster(status, "", ""); LOG.debug("Unregistered the SystemML application master"); } }
From source file:com.ibm.bi.dml.yarn.DMLYarnClient.java
License:Open Source License
/** * Method to launch the dml yarn app master and execute the given dml script * with the given configuration and jar file. * /*ww w .ja v a2 s .co m*/ * NOTE: on launching the yarn app master, we do not explicitly probe if we * are running on a yarn or MR1 cluster. In case of MR1, already the class * YarnConfiguration will not be found and raise a classnotfound. In case of any * exception we fall back to run CP directly in the client process. * * @return true if dml program successfully executed as yarn app master * @throws IOException */ protected boolean launchDMLYarnAppmaster() throws IOException, DMLScriptException { boolean ret = false; String hdfsWD = null; try { Timing time = new Timing(true); // load yarn configuration YarnConfiguration yconf = new YarnConfiguration(); // create yarn client YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(yconf); yarnClient.start(); // create application and get the ApplicationID YarnClientApplication app = yarnClient.createApplication(); ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); LOG.debug("Created application (applicationID: " + appId + ")"); // prepare hdfs working directory via ApplicationID // copy script, config, jar file to hdfs hdfsWD = DMLAppMasterUtils.constructHDFSWorkingDir(_dmlConfig, appId); copyResourcesToHdfsWorkingDir(yconf, hdfsWD); //construct command line argument String command = constructAMCommand(_args, _dmlConfig); LOG.debug("Constructed application master command: \n" + command); // set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); amContainer.setCommands(Collections.singletonList(command)); amContainer.setLocalResources(constructLocalResourceMap(yconf)); amContainer.setEnvironment(constructEnvionmentMap(yconf)); // Set up resource type requirements for ApplicationMaster int memHeap = _dmlConfig.getIntValue(DMLConfig.YARN_APPMASTERMEM); int memAlloc = (int) computeMemoryAllocation(memHeap); Resource capability = Records.newRecord(Resource.class); capability.setMemory(memAlloc); capability.setVirtualCores(NUM_CORES); LOG.debug("Requested application resources: memory=" + memAlloc + ", vcores=" + NUM_CORES); // Finally, set-up ApplicationSubmissionContext for the application String qname = _dmlConfig.getTextValue(DMLConfig.YARN_APPQUEUE); appContext.setApplicationName(APPMASTER_NAME); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue(qname); // queue LOG.debug("Configured application meta data: name=" + APPMASTER_NAME + ", queue=" + qname); // submit application (non-blocking) yarnClient.submitApplication(appContext); // Check application status periodically (and output web ui address) ApplicationReport appReport = yarnClient.getApplicationReport(appId); LOG.info("Application tracking-URL: " + appReport.getTrackingUrl()); YarnApplicationState appState = appReport.getYarnApplicationState(); YarnApplicationState oldState = appState; LOG.info("Application state: " + appState); while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { Thread.sleep(APP_STATE_INTERVAL); //wait for 200ms appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); if (appState != oldState) { oldState = appState; LOG.info("Application state: " + appState); } } //check final status (failed or succeeded) FinalApplicationStatus finalState = appReport.getFinalApplicationStatus(); LOG.info("Application final status: " + finalState); //show application and total runtime double appRuntime = (double) (appReport.getFinishTime() - appReport.getStartTime()) / 1000; LOG.info("Application runtime: " + appRuntime + " sec."); LOG.info("Total runtime: " + String.format("%.3f", time.stop() / 1000) + " sec."); //raised script-level error in case of failed final status if (finalState != FinalApplicationStatus.SUCCEEDED) { //propagate script-level stop call message String stop_msg = readMessageToHDFSWorkingDir(_dmlConfig, yconf, appId); if (stop_msg != null) throw new DMLScriptException(stop_msg); //generic failure message throw new DMLRuntimeException( "DML yarn app master finished with final status: " + finalState + "."); } ret = true; } catch (DMLScriptException ex) { //rethrow DMLScriptException to propagate stop call throw ex; } catch (Exception ex) { LOG.error("Failed to run DML yarn app master.", ex); ret = false; } finally { //cleanup working directory if (hdfsWD != null) MapReduceTool.deleteFileIfExistOnHDFS(hdfsWD); } return ret; }
From source file:com.inforefiner.hdata.ApplicationMaster.java
License:Apache License
@VisibleForTesting protected boolean finish() { // wait for completion. while (!done && (numCompletedContainers.get() != numTotalContainers)) { try {//www.j a v a 2s.c o m Thread.sleep(200); } catch (InterruptedException ex) { } } if (timelineClient != null) { publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END, domainId, appSubmitterUgi); } // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try { launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(); } } // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; boolean success = true; if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get(); LOG.info(appMessage); success = false; } try { amRMClient.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } amRMClient.stop(); // Stop Timeline Client if (timelineClient != null) { timelineClient.stop(); } return success; }
From source file:com.inforefiner.hdata.SubmitClient.java
License:Apache License
/** * Monitor the submitted application for completion. * Kill application if time expires./* w ww .j ava 2 s . co m*/ * * @param appId Application Id of application to be monitored * @return true if application completed successfully * @throws YarnException * @throws IOException */ private boolean monitorApplication(ApplicationId appId) throws YarnException, IOException { while (true) { // Check app status every 1 second. try { Thread.sleep(1000); } catch (InterruptedException e) { LOG.debug("Thread sleep in monitoring loop interrupted"); } // Get application report for the appId we are interested in ApplicationReport report = yarnClient.getApplicationReport(appId); // // List<ContainerReport> containerReports = yarnClient.getContainers(report.getCurrentApplicationAttemptId()); // for (ContainerReport containerReport : containerReports) { // String logUrl = containerReport.getLogUrl(); // LOG.info("Container log url = " + logUrl + ", host = "); // } // LOG.info("Got application report from ASM for" // + ", appId=" + appId.getId() // + ", clientToAMToken=" + report.getClientToAMToken() // + ", appDiagnostics=" + report.getDiagnostics() // + ", appMasterHost=" + report.getHost() // + ", appQueue=" + report.getQueue() // + ", appMasterRpcPort=" + report.getRpcPort() // + ", appStartTime=" + report.getStartTime() // + ", yarnAppState=" + report.getYarnApplicationState().toString() // + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() // + ", appTrackingUrl=" + report.getTrackingUrl() // + ", appUser=" + report.getUser()); YarnApplicationState state = report.getYarnApplicationState(); FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); if (YarnApplicationState.FINISHED == state) { if (FinalApplicationStatus.SUCCEEDED == dsStatus) { LOG.info("Application has completed successfully. Breaking monitoring loop"); return true; } else { LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) { LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { LOG.info("Reached client specified timeout for application. Killing application"); forceKillApplication(appId); return false; } } }
From source file:com.scistor.dshell.ScistorClient.java
License:Apache License
/** * Monitor the submitted application for completion. Kill application if * time expires.//from ww w . j a va2 s . co m * * @param appId * Application Id of application to be monitored * @return true if application completed successfully * @throws YarnException * @throws IOException */ private boolean monitorApplication(ApplicationId appId) throws YarnException, IOException { while (true) { // Check app status every 2 second. try { Thread.sleep(2000); } catch (InterruptedException e) { LOG.debug("Thread sleep in monitoring loop interrupted"); } // Get application report for the appId we are interested in ApplicationReport report = yarnClient.getApplicationReport(appId); LOG.info("Got application report from ASM for" + ", appId=" + appId.getId() + ", clientToAMToken=" + report.getClientToAMToken() + ", appDiagnostics=" + report.getDiagnostics() + ", appMasterHost=" + report.getHost() + ", appQueue=" + report.getQueue() + ", appMasterRpcPort=" + report.getRpcPort() + ", appStartTime=" + report.getStartTime() + ", yarnAppState=" + report.getYarnApplicationState().toString() + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() + ", appTrackingUrl=" + report.getTrackingUrl() + ", appUser=" + report.getUser()); YarnApplicationState state = report.getYarnApplicationState(); FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); if (YarnApplicationState.FINISHED == state) { if (FinalApplicationStatus.SUCCEEDED == dsStatus) { LOG.info("Application has completed successfully. Breaking monitoring loop"); return true; } else { LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) { LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { LOG.info("Reached client specified timeout for application. Killing application"); forceKillApplication(appId); return false; } } }
From source file:com.sogou.dockeronyarn.client.DockerApplicationMaster_23.java
License:Apache License
private void finish() { // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try {/*from www. java 2 s . com*/ launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(); } } // When the application completes, it should stopContainer all running containers LOG.info("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a shutdown application // signal to the RM LOG.info("Application completed. Signalling shutdown to RM"); FinalApplicationStatus appStatus; String appMessage = null; success = true; if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get(); success = false; } try { amRMClient.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } amRMClient.stop(); }
From source file:com.sogou.dockeronyarn.client.DockerClient.java
License:Apache License
/** * Monitor the submitted application for completion. * Kill application if time expires. //from ww w . jav a 2s .co m * @param appId Application Id of application to be monitored * @return true if application completed successfully * @throws YarnException * @throws IOException */ private boolean monitorApplication(ApplicationId appId) throws YarnException, IOException { while (true) { // Check app status every 1 second. try { Thread.sleep(1000); } catch (InterruptedException e) { LOG.debug("Thread sleep in monitoring loop interrupted"); } // Get application report for the appId we are interested in ApplicationReport report = yarnClient.getApplicationReport(appId); LOG.info("Got application report from ASM for" + ", appId=" + appId.getId() + ", clientToAMToken=" + report.getClientToAMToken() + ", appDiagnostics=" + report.getDiagnostics() + ", appMasterHost=" + report.getHost() + ", appQueue=" + report.getQueue() + ", appMasterRpcPort=" + report.getRpcPort() + ", appStartTime=" + report.getStartTime() + ", yarnAppState=" + report.getYarnApplicationState().toString() + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() + ", appTrackingUrl=" + report.getTrackingUrl() + ", appUser=" + report.getUser()); YarnApplicationState state = report.getYarnApplicationState(); FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); if (YarnApplicationState.FINISHED == state) { if (FinalApplicationStatus.SUCCEEDED == dsStatus) { LOG.info("Application has completed successfully. Breaking monitoring loop"); return true; } else { LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) { LOG.info("Application did not shutdown." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { LOG.info("Reached client specified timeout for application. Killing application"); forceKillApplication(appId); return false; } } }
From source file:com.sogou.dockeronyarn.service.DockerApplicationMaster_24.java
License:Apache License
@VisibleForTesting protected boolean finish() { // wait for completion. while (!done && (numCompletedContainers.get() != numTotalContainers)) { try {/*from w ww . j av a 2s . c o m*/ Thread.sleep(200); } catch (InterruptedException ex) { } } // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try { launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(); } } // When the application completes, it should stopContainer all running containers LOG.info("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a shutdown application // signal to the RM LOG.info("Application completed. Signalling shutdown to RM"); FinalApplicationStatus appStatus; String appMessage = null; boolean success = true; if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get(); success = false; } try { amRMClient.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } amRMClient.stop(); return success; }