List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED
FinalApplicationStatus SUCCEEDED
To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED.
Click Source Link
From source file:com.continuuity.weave.internal.yarn.Hadoop21YarnAMClient.java
License:Apache License
@Override protected void shutDown() throws Exception { nmClient.stopAndWait();//ww w . j a va 2 s . c o m amrmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, trackerUrl.toString()); amrmClient.stop(); }
From source file:com.continuuity.weave.yarn.ApplicationMasterService.java
License:Open Source License
private void doStop() throws Exception { Thread.interrupted(); // This is just to clear the interrupt flag LOG.info("Stop application master with spec: " + WeaveSpecificationAdapter.create().toJson(weaveSpec)); Set<ContainerId> ids = Sets.newHashSet(runningContainers.getContainerIds()); runningContainers.stopAll();//from w w w .j a v a 2 s . c om int count = 0; while (!ids.isEmpty() && count++ < 5) { AllocateResponse allocateResponse = amrmClient.allocate(0.0f); for (ContainerStatus status : allocateResponse.getAMResponse().getCompletedContainersStatuses()) { ids.remove(status.getContainerId()); } TimeUnit.SECONDS.sleep(1); } amrmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null); amrmClient.stop(); }
From source file:com.datatorrent.stram.InlineAM.java
License:Apache License
public boolean run() throws Exception { LOG.info("Starting Client"); // Connect to ResourceManager rmClient.start();//ww w. j av a 2s . c o m try { // Get a new application id YarnClientApplication newApp = rmClient.createApplication(); ApplicationId appId = newApp.getNewApplicationResponse().getApplicationId(); // Create launch context for app master LOG.info("Setting up application submission context for ASM"); ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); // set the application id appContext.setApplicationId(appId); // set the application name appContext.setApplicationName(appName); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); appContext.setAMContainerSpec(amContainer); // unmanaged AM appContext.setUnmanagedAM(true); LOG.info("Setting unmanaged AM"); // Submit the application to the applications manager LOG.info("Submitting application to ASM"); rmClient.submitApplication(appContext); // Monitor the application to wait for launch state ApplicationReport appReport = monitorApplication(appId, EnumSet.of(YarnApplicationState.ACCEPTED)); ApplicationAttemptId attemptId = appReport.getCurrentApplicationAttemptId(); LOG.info("Launching application with id: " + attemptId); // launch AM runAM(attemptId); // Monitor the application for end state appReport = monitorApplication(appId, EnumSet.of(YarnApplicationState.KILLED, YarnApplicationState.FAILED, YarnApplicationState.FINISHED)); YarnApplicationState appState = appReport.getYarnApplicationState(); FinalApplicationStatus appStatus = appReport.getFinalApplicationStatus(); LOG.info("App ended with state: " + appReport.getYarnApplicationState() + " and status: " + appStatus); boolean success; if (YarnApplicationState.FINISHED == appState && FinalApplicationStatus.SUCCEEDED == appStatus) { LOG.info("Application has completed successfully."); success = true; } else { LOG.info("Application did finished unsuccessfully." + " YarnState=" + appState.toString() + ", FinalStatus=" + appStatus.toString()); success = false; } return success; } finally { rmClient.stop(); } }
From source file:com.datatorrent.stram.StramMiniClusterTest.java
License:Apache License
@Ignore @Test//from w ww . j av a 2 s.c om public void testUnmanagedAM() throws Exception { new InlineAM(conf) { @Override @SuppressWarnings("SleepWhileInLoop") public void runAM(ApplicationAttemptId attemptId) throws Exception { LOG.debug("AM running {}", attemptId); //AMRMClient amRmClient = new AMRMClientImpl(attemptId); //amRmClient.init(conf); //amRmClient.start(); YarnClientHelper yarnClient = new YarnClientHelper(conf); ApplicationMasterProtocol resourceManager = yarnClient.connectToRM(); // register with the RM (LAUNCHED -> RUNNING) RegisterApplicationMasterRequest appMasterRequest = Records .newRecord(RegisterApplicationMasterRequest.class); resourceManager.registerApplicationMaster(appMasterRequest); // AM specific logic /* int containerCount = 1; Resource capability = Records.newRecord(Resource.class); capability.setMemory(1500); Priority priority = Records.newRecord(Priority.class); priority.setPriority(10); String[] hosts = {"vm1"}; String[] racks = {"somerack"}; AMRMClient.ContainerRequest req = new AMRMClient.ContainerRequest(capability, hosts, racks, priority, containerCount); amRmClient.addContainerRequest(req); for (int i=0; i<100; i++) { AllocateResponse ar = amRmClient.allocate(0); Thread.sleep(1000); LOG.debug("allocateResponse: {}" , ar); } */ int responseId = 0; AllocateRequest req = Records.newRecord(AllocateRequest.class); req.setResponseId(responseId++); List<ResourceRequest> lr = Lists.newArrayList(); lr.add(setupContainerAskForRM("hdev-vm", 1, 128, 10)); lr.add(setupContainerAskForRM("/default-rack", 1, 128, 10)); lr.add(setupContainerAskForRM("*", 1, 128, 10)); req.setAskList(lr); LOG.info("Requesting: " + req.getAskList()); resourceManager.allocate(req); for (int i = 0; i < 100; i++) { req = Records.newRecord(AllocateRequest.class); req.setResponseId(responseId++); AllocateResponse ar = resourceManager.allocate(req); sleep(1000); LOG.debug("allocateResponse: {}", ar); } // unregister from RM FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class); finishReq.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED); finishReq.setDiagnostics("testUnmanagedAM finished"); resourceManager.finishApplicationMaster(finishReq); } private ResourceRequest setupContainerAskForRM(String resourceName, int numContainers, int containerMemory, int priority) { ResourceRequest request = Records.newRecord(ResourceRequest.class); // setup requirements for hosts // whether a particular rack/host is needed // Refer to apis under org.apache.hadoop.net for more // details on how to get figure out rack/host mapping. // using * as any host will do for the distributed shell app request.setResourceName(resourceName); // set no. of containers needed request.setNumContainers(numContainers); // set the priority for the request Priority pri = Records.newRecord(Priority.class); pri.setPriority(priority); request.setPriority(pri); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(containerMemory); request.setCapability(capability); return request; } }.run(); }
From source file:com.datatorrent.stram.StramMiniClusterTest.java
License:Apache License
@Ignore @Test// w w w .j ava2s . co m public void testUnmanagedAM2() throws Exception { new InlineAM(conf) { @Override @SuppressWarnings("SleepWhileInLoop") public void runAM(ApplicationAttemptId attemptId) throws Exception { LOG.debug("AM running {}", attemptId); AMRMClient<ContainerRequest> amRmClient = AMRMClient.createAMRMClient(); amRmClient.init(conf); amRmClient.start(); // register with the RM (LAUNCHED -> RUNNING) amRmClient.registerApplicationMaster("", 0, null); // AM specific logic String[] hosts = { "vm1" }; String[] racks = { "somerack" }; Resource capability = Records.newRecord(Resource.class); capability.setMemory(1000); Priority priority = Records.newRecord(Priority.class); priority.setPriority(10); AMRMClient.ContainerRequest req = new AMRMClient.ContainerRequest(capability, hosts, racks, priority); amRmClient.addContainerRequest(req); amRmClient.addContainerRequest(req); /* capability = Records.newRecord(Resource.class); capability.setMemory(5512); priority = Records.newRecord(Priority.class); priority.setPriority(11); req = new AMRMClient.ContainerRequest(capability, hosts, racks, priority, 3); amRmClient.addContainerRequest(req); */ for (int i = 0; i < 100; i++) { AllocateResponse ar = amRmClient.allocate(0); sleep(1000); LOG.debug("allocateResponse: {}", ar); for (Container c : ar.getAllocatedContainers()) { LOG.debug("*** allocated {}", c.getResource()); amRmClient.removeContainerRequest(req); } /* GetClusterNodesRequest request = Records.newRecord(GetClusterNodesRequest.class); ClientRMService clientRMService = yarnCluster.getResourceManager().getClientRMService(); GetClusterNodesResponse response = clientRMService.getClusterNodes(request); List<NodeReport> nodeReports = response.getNodeReports(); LOG.info(nodeReports); for (NodeReport nr: nodeReports) { LOG.info("Node: " + nr.getNodeId()); LOG.info("Total memory: " + nr.getCapability()); LOG.info("Used memory: " + nr.getUsed()); LOG.info("Number containers: " + nr.getNumContainers()); } */ } // unregister from RM amRmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "testUnmanagedAM finished", null); } }.run(); }
From source file:com.datatorrent.stram.StreamingAppMasterService.java
License:Apache License
/** * Main run function for the application master * * @throws YarnException/*from w ww. j a v a 2 s . co m*/ */ @SuppressWarnings("SleepWhileInLoop") private void execute() throws YarnException, IOException { LOG.info("Starting ApplicationMaster"); final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); LOG.info("number of tokens: {}", credentials.getAllTokens().size()); Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); LOG.debug("token: {}", token); } final Configuration conf = getConfig(); long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math .min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME))); long expiryTime = System.currentTimeMillis() + tokenLifeTime; LOG.debug(" expiry token time {}", tokenLifeTime); String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE); // Register self with ResourceManager RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0, appMasterTrackingUrl); // Dump out information about cluster capability as seen by the resource manager int maxMem = response.getMaximumResourceCapability().getMemory(); int maxVcores = response.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max mem {}m and vcores {} capabililty of resources in this cluster ", maxMem, maxVcores); // for locality relaxation fall back Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps .newHashMap(); // Setup heartbeat emitter // TODO poll RM every now and then with an empty request to let RM know that we are alive // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting: // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter // is not required. int loopCounter = -1; List<ContainerId> releasedContainers = new ArrayList<ContainerId>(); int numTotalContainers = 0; // keep track of already requested containers to not request them again while waiting for allocation int numRequestedContainers = 0; int numReleasedContainers = 0; int nextRequestPriority = 0; ResourceRequestHandler resourceRequestor = new ResourceRequestHandler(); YarnClient clientRMService = YarnClient.createYarnClient(); try { // YARN-435 // we need getClusterNodes to populate the initial node list, // subsequent updates come through the heartbeat response clientRMService.init(conf); clientRMService.start(); ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService, dag.getAttributes().get(DAG.APPLICATION_NAME), UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID)); if (ar != null) { appDone = true; dnmgr.shutdownDiagnosticsMessage = String.format( "Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.", ar.getApplicationId().toString(), ar.getName(), ar.getUser()); LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage); finishApplication(FinalApplicationStatus.FAILED, numTotalContainers); return; } resourceRequestor.updateNodeReports(clientRMService.getNodeReports()); } catch (Exception e) { throw new RuntimeException("Failed to retrieve cluster nodes report.", e); } finally { clientRMService.stop(); } // check for previously allocated containers // as of 2.2, containers won't survive AM restart, but this will change in the future - YARN-1490 checkContainerStatus(); FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED; final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); while (!appDone) { loopCounter++; if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime && hdfsKeyTabFile != null) { String applicationId = appAttemptID.getApplicationId().toString(); expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, "." + File.separator + "tmp", applicationId, conf, hdfsKeyTabFile, credentials, rmAddress, true); } Runnable r; while ((r = this.pendingTasks.poll()) != null) { r.run(); } // log current state /* * LOG.info("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" + * numTotalContainers + ", requested=" + numRequestedContainers + ", completed=" + numCompletedContainers + * ", failed=" + numFailedContainers + ", currentAllocated=" + this.allAllocatedContainers.size()); */ // Sleep before each loop when asking RM for containers // to avoid flooding RM with spurious requests when it // need not have any available containers try { sleep(1000); } catch (InterruptedException e) { LOG.info("Sleep interrupted " + e.getMessage()); } // Setup request to be sent to RM to allocate containers List<ContainerRequest> containerRequests = new ArrayList<ContainerRequest>(); List<ContainerRequest> removedContainerRequests = new ArrayList<ContainerRequest>(); // request containers for pending deploy requests if (!dnmgr.containerStartRequests.isEmpty()) { StreamingContainerAgent.ContainerStartRequest csr; while ((csr = dnmgr.containerStartRequests.poll()) != null) { if (csr.container.getRequiredMemoryMB() > maxMem) { LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.", csr.container.getRequiredMemoryMB(), maxMem); csr.container.setRequiredMemoryMB(maxMem); } if (csr.container.getRequiredVCores() > maxVcores) { LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.", csr.container.getRequiredVCores(), maxVcores); csr.container.setRequiredVCores(maxVcores); } csr.container.setResourceRequestPriority(nextRequestPriority++); ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true); MutablePair<Integer, ContainerRequest> pair = new MutablePair<Integer, ContainerRequest>( loopCounter, cr); requestedResources.put(csr, pair); containerRequests.add(cr); } } if (!requestedResources.isEmpty()) { //resourceRequestor.clearNodeMapping(); for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources .entrySet()) { if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) { StreamingContainerAgent.ContainerStartRequest csr = entry.getKey(); removedContainerRequests.add(entry.getValue().getRight()); ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false); entry.getValue().setLeft(loopCounter); entry.getValue().setRight(cr); containerRequests.add(cr); } } } numTotalContainers += containerRequests.size(); numRequestedContainers += containerRequests.size(); AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests, releasedContainers); if (amResp.getAMCommand() != null) { LOG.info(" statement executed:{}", amResp.getAMCommand()); switch (amResp.getAMCommand()) { case AM_RESYNC: case AM_SHUTDOWN: throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM"); default: throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM"); } } releasedContainers.clear(); // Retrieve list of allocated containers from the response List<Container> newAllocatedContainers = amResp.getAllocatedContainers(); // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size()); numRequestedContainers -= newAllocatedContainers.size(); long timestamp = System.currentTimeMillis(); for (Container allocatedContainer : newAllocatedContainers) { LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode=" + allocatedContainer.getNodeId() + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory" + allocatedContainer.getResource().getMemory() + ", priority" + allocatedContainer.getPriority()); // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString()); boolean alreadyAllocated = true; StreamingContainerAgent.ContainerStartRequest csr = null; for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources .entrySet()) { if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority() .getPriority()) { alreadyAllocated = false; csr = entry.getKey(); break; } } if (alreadyAllocated) { LOG.info("Releasing {} as resource with priority {} was already assigned", allocatedContainer.getId(), allocatedContainer.getPriority()); releasedContainers.add(allocatedContainer.getId()); numReleasedContainers++; numRequestedContainers++; continue; } if (csr != null) { requestedResources.remove(csr); } // allocate resource to container ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(), allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(), allocatedContainer.getResource().getMemory(), allocatedContainer.getResource().getVirtualCores(), allocatedContainer.getNodeHttpAddress()); StreamingContainerAgent sca = dnmgr.assignContainer(resource, null); if (sca == null) { // allocated container no longer needed, add release request LOG.warn("Container {} allocated but nothing to deploy, going to release this container.", allocatedContainer.getId()); releasedContainers.add(allocatedContainer.getId()); } else { AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer); this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder); ByteBuffer tokens = null; if (UserGroupInformation.isSecurityEnabled()) { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken( ugi.getUserName(), heartbeatListener.getAddress()); allocatedContainerHolder.delegationToken = delegationToken; //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress()); tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken); } LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer, nmClient, sca, tokens); // Thread launchThread = new Thread(runnableLaunchContainer); // launchThreads.add(launchThread); // launchThread.start(); launchContainer.run(); // communication with NMs is now async // record container start event StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString()); ev.setTimestamp(timestamp); dnmgr.recordEventAsync(ev); } } // track node updates for future locality constraint allocations // TODO: it seems 2.0.4-alpha doesn't give us any updates resourceRequestor.updateNodeReports(amResp.getUpdatedNodes()); // Check the completed containers List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses(); // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); AllocatedContainer allocatedContainer = allocatedContainers .remove(containerStatus.getContainerId().toString()); if (allocatedContainer != null && allocatedContainer.delegationToken != null) { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName()); } int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { if (allocatedContainer != null) { numFailedContainers.incrementAndGet(); } // if (exitStatus == 1) { // // non-recoverable StreamingContainer failure // appDone = true; // finalStatus = FinalApplicationStatus.FAILED; // dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId(); // LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage); // } // else { // Recoverable failure or process killed (externally or via stop request by AM) // also occurs when a container was released by the application but never assigned/launched LOG.debug("Container {} failed or killed.", containerStatus.getContainerId()); dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString()); // } } else { // container completed successfully numCompletedContainers.incrementAndGet(); LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } String containerIdStr = containerStatus.getContainerId().toString(); dnmgr.removeContainerAgent(containerIdStr); // record container stop event StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus()); ev.setReason(containerStatus.getDiagnostics()); dnmgr.recordEventAsync(ev); } if (dnmgr.forcedShutdown) { LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage); finalStatus = FinalApplicationStatus.FAILED; appDone = true; } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0 && dnmgr.containerStartRequests.isEmpty()) { LOG.debug("Exiting as no more containers are allocated or requested"); finalStatus = FinalApplicationStatus.SUCCEEDED; appDone = true; } LOG.debug("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" + numTotalContainers + ", requested=" + numRequestedContainers + ", released=" + numReleasedContainers + ", completed=" + numCompletedContainers + ", failed=" + numFailedContainers + ", currentAllocated=" + allocatedContainers.size()); // monitor child containers dnmgr.monitorHeartbeat(); } finishApplication(finalStatus, numTotalContainers); }
From source file:com.datatorrent.stram.StreamingAppMasterService.java
License:Apache License
private void finishApplication(FinalApplicationStatus finalStatus, int numTotalContainers) throws YarnException, IOException { LOG.info("Application completed. Signalling finish to RM"); FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class); finishReq.setFinalApplicationStatus(finalStatus); if (finalStatus != FinalApplicationStatus.SUCCEEDED) { String diagnostics = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", allocated=" + allocatedContainers.size() + ", failed=" + numFailedContainers.get(); if (!StringUtils.isEmpty(dnmgr.shutdownDiagnosticsMessage)) { diagnostics += "\n"; diagnostics += dnmgr.shutdownDiagnosticsMessage; }// w w w . j av a 2 s .co m // YARN-208 - as of 2.0.1-alpha dropped by the RM finishReq.setDiagnostics(diagnostics); // expected termination of the master process // application status and diagnostics message are set above } LOG.info("diagnostics: " + finishReq.getDiagnostics()); amRmClient.unregisterApplicationMaster(finishReq.getFinalApplicationStatus(), finishReq.getDiagnostics(), null); }
From source file:com.epam.hadoop.nv.yarn.Client.java
License:Apache License
/** * Monitor the submitted application for completion. Kill application if * time expires.// ww w. j a v a 2 s . c om * * @param appId * Application Id of application to be monitored * @return true if application completed successfully * @throws YarnException * @throws IOException */ private boolean monitorApplication(ApplicationId appId) throws YarnException, IOException { while (true) { // Check app status every 1 second. try { Thread.sleep(1000); } catch (InterruptedException e) { LOG.debug("Thread sleep in monitoring loop interrupted"); } // Get application report for the appId we are interested in ApplicationReport report = yarnClient.getApplicationReport(appId); LOG.info("Got application report from ASM for" + ", appId=" + appId.getId() + ", clientToAMToken=" + report.getClientToAMToken() + ", appDiagnostics=" + report.getDiagnostics() + ", appMasterHost=" + report.getHost() + ", appQueue=" + report.getQueue() + ", appMasterRpcPort=" + report.getRpcPort() + ", appStartTime=" + report.getStartTime() + ", yarnAppState=" + report.getYarnApplicationState().toString() + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() + ", appTrackingUrl=" + report.getTrackingUrl() + ", appUser=" + report.getUser()); YarnApplicationState state = report.getYarnApplicationState(); FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); if (YarnApplicationState.FINISHED == state) { if (FinalApplicationStatus.SUCCEEDED == dsStatus) { LOG.info("Application has completed successfully. Breaking monitoring loop"); return true; } else { LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) { LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { LOG.info("Reached client specified timeout for application. Killing application"); forceKillApplication(appId); return false; } } }
From source file:com.flyhz.avengers.framework.AvengersClient.java
License:Apache License
/** * Monitor the submitted application for completion. Kill application if * time expires./*from ww w . ja v a2s. c o m*/ * * @param appId * Application Id of application to be monitored * @return true if application completed successfully * @throws YarnException * @throws IOException */ private boolean monitorApplication(ApplicationId appId) throws YarnException, IOException { while (true) { // Check app status every 10 second. try { Thread.sleep(10000); } catch (InterruptedException e) { LOG.debug("Thread sleep in monitoring loop interrupted"); } // Get application report for the appId we are interested in ApplicationReport report = yarnClient.getApplicationReport(appId); LOG.info("Got application report from ASM for" + ", appId=" + appId.getId() + ", clientToAMToken=" + report.getClientToAMToken() + ", appDiagnostics=" + report.getDiagnostics() + ", appMasterHost=" + report.getHost() + ", appQueue=" + report.getQueue() + ", appMasterRpcPort=" + report.getRpcPort() + ", appStartTime=" + report.getStartTime() + ", yarnAppState=" + report.getYarnApplicationState().toString() + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() + ", appTrackingUrl=" + report.getTrackingUrl() + ", appUser=" + report.getUser()); YarnApplicationState state = report.getYarnApplicationState(); FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); if (YarnApplicationState.FINISHED == state) { if (FinalApplicationStatus.SUCCEEDED == dsStatus) { LOG.info("Application has completed successfully. Breaking monitoring loop"); return true; } else { LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) { LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { LOG.info("Reached client specified timeout for application. Killing application"); forceKillApplication(appId); return false; } } }
From source file:com.github.hdl.tensorflow.yarn.app.ApplicationMaster.java
License:Apache License
@VisibleForTesting protected boolean finish() { // wait for completion. while (!done && (numCompletedContainers.get() != numTotalContainers)) { try {//www. ja v a2s .c o m Thread.sleep(200); } catch (InterruptedException ex) { } } // Join all launched threads // needed for when we time out // and we need to release containers for (Thread launchThread : launchThreads) { try { launchThread.join(10000); } catch (InterruptedException e) { LOG.info("Exception thrown in thread join: " + e.getMessage()); e.printStackTrace(); } } // When the application completes, it should stop all running containers LOG.info("Application completed. Stopping running containers"); nmClientAsync.stop(); // When the application completes, it should send a finish application // signal to the RM LOG.info("Application completed. Signalling finish to RM"); FinalApplicationStatus appStatus; String appMessage = null; boolean success = true; if (numCompletedContainers.get() - numFailedContainers.get() >= numTotalContainers) { appStatus = FinalApplicationStatus.SUCCEEDED; } else { appStatus = FinalApplicationStatus.FAILED; appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed=" + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get(); LOG.info(appMessage); success = false; } try { amRMClient.unregisterApplicationMaster(appStatus, appMessage, null); } catch (YarnException ex) { LOG.error("Failed to unregister application", ex); } catch (IOException e) { LOG.error("Failed to unregister application", e); } amRMClient.stop(); return success; }