List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED
FinalApplicationStatus FAILED
To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED.
Click Source Link
From source file:com.zqh.hadoop.moya.core.yarn.ApplicationMaster.java
License:Apache License
private void finish() { // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try {//from w ww .ja v a 2 s . c o m launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(); } } // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); //TODO Remove MOYA NODE try { DeleteGroup.main(new String[] { ZKHosts, "moya" }); } catch (Exception e1) { e1.printStackTrace(); } FinalApplicationStatus appStatus; String appMessage = null; success = true; if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get(); success = false; } try { resourceManager.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } done = true; resourceManager.stop(); }
From source file:de.huberlin.wbi.hiway.am.HiWay.java
License:Apache License
private void finish() { writeEntryToLog(new JsonReportEntry(getRunId(), null, null, null, null, null, HiwayDBI.KEY_WF_TIME, Long.toString(System.currentTimeMillis() - amRMClient.getStartTime()))); Collection<Data> outputFiles = getOutputFiles(); if (outputFiles.size() > 0) { String outputs = getOutputFiles().toString(); writeEntryToLog(new JsonReportEntry(getRunId(), null, null, null, null, null, HiwayDBI.KEY_WF_OUTPUT, outputs.substring(1, outputs.length() - 1))); }/*from ww w . j ava 2 s. c om*/ // Join all launched threads needed for when we time out and we need to release containers for (Thread launchThread : launchThreads) { try { launchThread.join(10000); } catch (InterruptedException e) { System.err.println("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(); System.exit(-1); } } // When the application completes, it should stop all running containers System.out.println("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a finish application signal to the RM System.out.println("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; success = true; System.out.println("Failed Containers: " + numFailedContainers.get()); System.out.println("Completed Containers: " + numCompletedContainers.get()); int numTotalContainers = scheduler.getNumberOfTotalTasks(); System.out.println("Total Scheduled Containers: " + numTotalContainers); if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get() + ", killed=" + numKilledContainers.get(); success = false; } try { statLog.close(); federatedReport.stageOut(); if (summaryPath != null) { String stdout = hdfsApplicationDirectory + "/AppMaster.stdout"; String stderr = hdfsApplicationDirectory + "/AppMaster.stderr"; String statlog = hdfsApplicationDirectory + "/" + appId + ".log"; try (BufferedWriter writer = new BufferedWriter(new FileWriter(summaryPath.toString()))) { Collection<String> output = new ArrayList<>(); for (Data outputFile : getOutputFiles()) { output.add(outputFile.getHdfsPath().toString()); } JSONObject obj = new JSONObject(); try { obj.put("output", output); obj.put("stdout", stdout); obj.put("stderr", stderr); obj.put("statlog", statlog); } catch (JSONException e) { e.printStackTrace(); System.exit(-1); } writer.write(obj.toString()); } new Data("AppMaster.stdout").stageOut(); new Data("AppMaster.stderr").stageOut(); new Data(summaryPath).stageOut(); } } catch (IOException e) { System.err.println("Error when attempting to stage out federated output log."); e.printStackTrace(); System.exit(-1); } try { amRMClient.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException | IOException e) { System.err.println("Failed to unregister application"); e.printStackTrace(); System.exit(-1); } amRMClient.stop(); }
From source file:de.huberlin.wbi.hiway.am.WorkflowDriver.java
License:Apache License
protected void finish() { /* log */ logger.writeEntryToLog(new JsonReportEntry(getRunId(), null, null, null, null, null, HiwayDBI.KEY_WF_TIME, Long.toString(System.currentTimeMillis() - amRMClient.getStartTime()))); // Join all launched threads needed for when we time out and we need to release containers for (Thread launchThread : launchThreads) { try {/* w w w . ja v a 2s . c om*/ launchThread.join(10000); } catch (InterruptedException e) { Logger.writeToStdout("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(System.out); System.exit(-1); } } // When the application completes, it should stop all running containers Logger.writeToStdout("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a finish application signal to the RM Logger.writeToStdout("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; success = true; WorkflowDriver.Logger.writeToStdout("Failed Containers: " + logger.numFailedContainers.get()); WorkflowDriver.Logger.writeToStdout("Completed Containers: " + logger.numCompletedContainers.get()); int numTotalContainers = scheduler.getNumberOfTotalTasks(); // WorkflowDriver.writeToStdout("Total Scheduled Containers: " + numTotalContainers); if (logger.getNumFailedContainers().get() == 0 && logger.getNumCompletedContainers().get() == numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + logger.getNumCompletedContainers().get() + ", allocated=" + logger.getNumAllocatedContainers().get() + ", failed=" + logger.getNumFailedContainers().get() + ", killed=" + logger.getNumKilledContainers().get(); success = false; } Collection<String> output = getOutput(); Collection<Data> outputFiles = getOutputFiles(); if (outputFiles.size() > 0) { String outputs = outputFiles.toString(); logger.writeEntryToLog(new JsonReportEntry(getRunId(), null, null, null, null, null, HiwayDBI.KEY_WF_OUTPUT, outputs.substring(1, outputs.length() - 1))); } try { logger.statLog.close(); logger.federatedReport.stageOut(); if (summaryPath != null) { String stdout = hdfsApplicationDirectory + "/AppMaster.stdout"; String stderr = hdfsApplicationDirectory + "/AppMaster.stderr"; String statlog = hdfsApplicationDirectory + "/" + appId + ".log"; try (BufferedWriter writer = new BufferedWriter(new FileWriter(summaryPath.toString()))) { JSONObject obj = new JSONObject(); try { obj.put("output", output); obj.put("stdout", stdout); obj.put("stderr", stderr); obj.put("statlog", statlog); } catch (JSONException e) { e.printStackTrace(System.out); System.exit(-1); } writer.write(obj.toString()); } new Data("AppMaster.stdout").stageOut(); new Data("AppMaster.stderr").stageOut(); new Data(summaryPath).stageOut(); } } catch (IOException e) { Logger.writeToStdout("Error when attempting to stage out federated output log."); e.printStackTrace(System.out); System.exit(-1); } try { amRMClient.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException | IOException e) { Logger.writeToStdout("Failed to unregister application"); e.printStackTrace(System.out); System.exit(-1); } amRMClient.stop(); if (timelineClient != null) timelineClient.stop(); }
From source file:edu.cmu.graphchi.toolkits.collaborative_filtering.yarn.ApplicationMaster.java
License:Apache License
private void finish() { // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try {// w w w . j a v a2 s . c o m launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(); } } // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; success = true; if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", failed=" + numFailedContainers.get(); success = false; } try { amRMClient.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } amRMClient.stop(); }
From source file:edu.uci.ics.asterix.aoya.AsterixApplicationMaster.java
License:Apache License
/** * Clean up, whether or not we were successful. *//* www. jav a2s . c o m*/ private void finish() { // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try { launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); //from https://stackoverflow.com/questions/4812570/how-to-store-printstacktrace-into-a-string StringWriter errors = new StringWriter(); e.printStackTrace(new PrintWriter(errors)); LOG.error(errors.toString()); } } // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; success = true; if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get(); success = false; } try { resourceManager.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } done = true; resourceManager.stop(); }
From source file:gobblin.yarn.GobblinYarnAppLauncher.java
License:Apache License
@Subscribe public void handleApplicationReportArrivalEvent(ApplicationReportArrivalEvent applicationReportArrivalEvent) { ApplicationReport applicationReport = applicationReportArrivalEvent.getApplicationReport(); YarnApplicationState appState = applicationReport.getYarnApplicationState(); LOGGER.info("Gobblin Yarn application state: " + appState.toString()); // Reset the count on failures to get the ApplicationReport when there's one success this.getApplicationReportFailureCount.set(0); if (appState == YarnApplicationState.FINISHED || appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED) { applicationCompleted = true;/*w w w. ja va 2 s .c om*/ LOGGER.info("Gobblin Yarn application finished with final status: " + applicationReport.getFinalApplicationStatus().toString()); if (applicationReport.getFinalApplicationStatus() == FinalApplicationStatus.FAILED) { LOGGER.error("Gobblin Yarn application failed for the following reason: " + applicationReport.getDiagnostics()); } try { GobblinYarnAppLauncher.this.stop(); } catch (IOException ioe) { LOGGER.error("Failed to close the " + GobblinYarnAppLauncher.class.getSimpleName(), ioe); } catch (TimeoutException te) { LOGGER.error("Timeout in stopping the service manager", te); } finally { if (this.emailNotificationOnShutdown) { sendEmailOnShutdown(Optional.of(applicationReport)); } } } }
From source file:husky.server.HuskyApplicationMaster.java
License:Apache License
private void run() throws YarnException, IOException, InterruptedException, ExecutionException { LOG.info("Run App Master"); mRMClientListener = new HuskyRMCallbackHandler(this); mRMClient = AMRMClientAsync.createAMRMClientAsync(1000, mRMClientListener); mRMClient.init(mYarnConf);// ww w .j a v a 2 s. c o m mRMClient.start(); mContainerListener = new HuskyNMCallbackHandler(); mNMClient = NMClientAsync.createNMClientAsync(mContainerListener); mNMClient.init(mYarnConf); mNMClient.start(); // Register with ResourceManager LOG.info("registerApplicationMaster started"); mRMClient.registerApplicationMaster("", 0, ""); LOG.info("registerApplicationMaster done"); // Ask RM to start `mNumContainer` containers, each is a worker node LOG.info("Ask RM for " + mWorkerInfos.size() + " containers"); for (Pair<String, Integer> i : mWorkerInfos) { mRMClient.addContainerRequest(setupContainerAskForRMSpecific(i.getFirst())); } FinalApplicationStatus status = mRMClientListener.getFinalNumSuccess() == mWorkerInfos.size() ? FinalApplicationStatus.SUCCEEDED : FinalApplicationStatus.FAILED; mRMClient.unregisterApplicationMaster(status, mRMClientListener.getStatusReport(), null); }
From source file:io.hops.tensorflow.ApplicationMaster.java
License:Apache License
private boolean finish() { // wait for completion. finish if any container fails while (!done && !(numCompletedWorkers.get() == numWorkers) && !(numFailedContainers.get() > 0)) { if (numAllocatedContainers.get() != numTotalContainers) { long timeLeft = appMasterStartTime + allocationTimeout - System.currentTimeMillis(); LOG.info("Awaits container allocation, timeLeft=" + timeLeft); if (timeLeft < 0) { LOG.warn("Container allocation timeout. Finish application attempt"); break; }/*from w w w . j av a 2s. com*/ } try { Thread.sleep(200); } catch (InterruptedException ex) { } } if (timelineHandler.isClientNotNull()) { timelineHandler.publishApplicationAttemptEvent(YarntfEvent.YARNTF_APP_ATTEMPT_END); } // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try { launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(); } } // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); nmWrapper.getClient().stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; boolean success = true; if (numFailedContainers.get() == 0 && numCompletedWorkers.get() == numWorkers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed(workers)=" + numCompletedContainers.get() + "(" + numCompletedWorkers.get() + "), allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get(); LOG.info(appMessage); success = false; } try { rmWrapper.getClient().unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } rmWrapper.getClient().stop(); // Stop Timeline Client if (timelineHandler.isClientNotNull()) { timelineHandler.stopClient(); } return success; }
From source file:ml.shifu.guagua.yarn.GuaguaAppMaster.java
License:Apache License
/** * Call when the application is done//ww w . java2s . co m * * @return if all containers succeed */ private boolean finish() { // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); getNmClientAsync().stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; boolean success = true; if (getSuccessfulCount().get() == getContainersToLaunch()) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = String.format("Diagnostics total=%s, completed=%s, failed=%s.", getContainersToLaunch(), getCompletedCount().get(), getFailedCount().get()); success = false; } try { getAmRMClient().unregisterApplicationMaster(appStatus, appMessage, this.appMasterTrackingUrl); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } getAmRMClient().stop(); return success; }
From source file:org.apache.drill.yarn.appMaster.AMYarnFacadeImpl.java
License:Apache License
@Override public void finish(boolean succeeded, String msg) throws YarnFacadeException { // Stop the Node Manager client. nodeMgr.stop();//from w w w . jav a 2s .c om // Deregister the app from YARN. String appMsg = "Drill Cluster Shut-Down"; FinalApplicationStatus status = FinalApplicationStatus.SUCCEEDED; if (!succeeded) { appMsg = "Drill Cluster Fatal Error - check logs"; status = FinalApplicationStatus.FAILED; } if (msg != null) { appMsg = msg; } try { resourceMgr.unregisterApplicationMaster(status, appMsg, ""); } catch (YarnException | IOException e) { throw new YarnFacadeException("Deregister AM failed", e); } // Stop the Resource Manager client resourceMgr.stop(); }