List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED
FinalApplicationStatus FAILED
To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED.
Click Source Link
From source file:org.apache.slider.server.appmaster.state.AppState.java
License:Apache License
/** * Check the failure threshold for a role * @param role role to examine/*from w w w . ja v a 2s. c o m*/ * @throws TriggerClusterTeardownException if the role * has failed too many times */ private void checkFailureThreshold(RoleStatus role) throws TriggerClusterTeardownException { int failures = role.getFailed(); int threshold = getFailureThresholdForRole(role); log.debug("Failure count of component: {}: {}, threshold={}", role.getName(), failures, threshold); if (failures > threshold) { throw new TriggerClusterTeardownException(SliderExitCodes.EXIT_DEPLOYMENT_FAILED, FinalApplicationStatus.FAILED, ErrorStrings.E_UNSTABLE_CLUSTER + " - failed with component %s failing %d times (%d in startup);" + " threshold is %d - last failure: %s", role.getName(), role.getFailed(), role.getStartFailed(), threshold, role.getFailureMessage()); } }
From source file:org.apache.slider.server.appmaster.state.AppState.java
License:Apache License
/** * Look at the allocation status of one role, and trigger add/release * actions if the number of desired role instances doesnt equal * (actual+pending)//from w w w. j a v a 2 s .c o m * @param role role * @return a list of operations * @throws SliderInternalStateException if the operation reveals that * the internal state of the application is inconsistent. */ @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter") private List<AbstractRMOperation> reviewOneRole(RoleStatus role) throws SliderInternalStateException, TriggerClusterTeardownException { List<AbstractRMOperation> operations = new ArrayList<AbstractRMOperation>(); int delta; String details; int expected; String name = role.getName(); synchronized (role) { delta = role.getDelta(); expected = role.getDesired(); } log.info("Reviewing {} : expected {}", role, expected); checkFailureThreshold(role); if (expected < 0) { // negative value: fail throw new TriggerClusterTeardownException(SliderExitCodes.EXIT_DEPLOYMENT_FAILED, FinalApplicationStatus.FAILED, "Negative component count of %d desired for component %s", expected, role); } if (delta > 0) { log.info("{}: Asking for {} more nodes(s) for a total of {} ", name, delta, expected); //more workers needed than we have -ask for more for (int i = 0; i < delta; i++) { Resource capability = recordFactory.newResource(); AMRMClient.ContainerRequest containerAsk = buildContainerResourceAndRequest(role, capability); log.info("Container ask is {} and label = {}", containerAsk, containerAsk.getNodeLabelExpression()); int askMemory = containerAsk.getCapability().getMemory(); if (askMemory > this.containerMaxMemory) { log.warn("Memory requested: {} > max of {}", askMemory, containerMaxMemory); } operations.add(new ContainerRequestOperation(containerAsk)); } } else if (delta < 0) { log.info("{}: Asking for {} fewer node(s) for a total of {}", name, -delta, expected); //reduce the number expected (i.e. subtract the delta) //then pick some containers to kill int excess = -delta; // how many requests are outstanding int outstandingRequests = role.getRequested(); if (outstandingRequests > 0) { // outstanding requests. int toCancel = Math.min(outstandingRequests, excess); Priority p1 = ContainerPriority.createPriority(role.getPriority(), true); Priority p2 = ContainerPriority.createPriority(role.getPriority(), false); operations.add(new CancelRequestOperation(p1, p2, toCancel)); role.cancel(toCancel); excess -= toCancel; assert excess >= 0 : "Attempted to cancel too many requests"; log.info("Submitted {} cancellations, leaving {} to release", toCancel, excess); if (excess == 0) { log.info("After cancelling requests, application is at desired size"); } } // after the cancellation there may be no excess if (excess > 0) { // get the nodes to release int roleId = role.getKey(); // enum all active nodes that aren't being released List<RoleInstance> containersToRelease = enumNodesWithRoleId(roleId, true); if (containersToRelease.isEmpty()) { log.info("No containers for component {}", roleId); } // cut all release-in-progress nodes ListIterator<RoleInstance> li = containersToRelease.listIterator(); while (li.hasNext()) { RoleInstance next = li.next(); if (next.released) { li.remove(); } } // warn if the desired state can't be reaced int numberAvailableForRelease = containersToRelease.size(); if (numberAvailableForRelease < excess) { log.warn("Not enough containers to release, have {} and need {} more", numberAvailableForRelease, excess - numberAvailableForRelease); } // ask the release selector to sort the targets containersToRelease = containerReleaseSelector.sortCandidates(roleId, containersToRelease, excess); //crop to the excess List<RoleInstance> finalCandidates = (excess < numberAvailableForRelease) ? containersToRelease.subList(0, excess) : containersToRelease; // then build up a release operation, logging each container as released for (RoleInstance possible : finalCandidates) { log.debug("Targeting for release: {}", possible); containerReleaseSubmitted(possible.container); operations.add(new ContainerReleaseOperation(possible.getId())); } } } return operations; }
From source file:org.apache.sysml.yarn.DMLAppMaster.java
License:Apache License
public void runApplicationMaster(String[] args) throws YarnException, IOException { _conf = new YarnConfiguration(); //obtain application ID String containerIdString = System.getenv(Environment.CONTAINER_ID.name()); ContainerId containerId = ConverterUtils.toContainerId(containerIdString); _appId = containerId.getApplicationAttemptId().getApplicationId(); LOG.info("SystemML appplication master (applicationID: " + _appId + ")"); //initialize clients to ResourceManager AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient(); rmClient.init(_conf);//w w w. jav a 2s . c o m rmClient.start(); //register with ResourceManager rmClient.registerApplicationMaster("", 0, ""); //host, port for rm communication LOG.debug("Registered the SystemML application master with resource manager"); //start status reporter to ResourceManager DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000); reporter.start(); LOG.debug("Started status reporter (heartbeat to resource manager)"); //set DMLscript app master context DMLScript.setActiveAM(); //parse input arguments String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs(); //run SystemML CP FinalApplicationStatus status = null; try { //core dml script execution (equivalent to non-AM runtime) boolean success = DMLScript.executeScript(_conf, otherArgs); if (success) status = FinalApplicationStatus.SUCCEEDED; else status = FinalApplicationStatus.FAILED; } catch (DMLScriptException ex) { LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t" + ex.getMessage()); status = FinalApplicationStatus.FAILED; writeMessageToHDFSWorkingDir(ex.getMessage()); } catch (Exception ex) { LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex); status = FinalApplicationStatus.FAILED; } finally { //stop periodic status reports reporter.stopStatusReporter(); LOG.debug("Stopped status reporter"); //unregister resource manager client rmClient.unregisterApplicationMaster(status, "", ""); LOG.debug("Unregistered the SystemML application master"); } }
From source file:org.apache.tajo.master.rm.RMContainerAllocator.java
License:Apache License
public void stop() { stopped.set(true);/*from www .j a va2s . c o m*/ super.stop(); FinalApplicationStatus finishState = FinalApplicationStatus.UNDEFINED; QueryState state = context.getQuery().getState(); if (state == QueryState.QUERY_SUCCEEDED) { finishState = FinalApplicationStatus.SUCCEEDED; } else if (state == QueryState.QUERY_KILLED || (state == QueryState.QUERY_RUNNING)) { finishState = FinalApplicationStatus.KILLED; } else if (state == QueryState.QUERY_FAILED || state == QueryState.QUERY_ERROR) { finishState = FinalApplicationStatus.FAILED; } try { unregisterApplicationMaster(finishState, "", "http://localhost:1234"); } catch (YarnRemoteException e) { LOG.error(e); } }
From source file:org.apache.tajo.master.rm.YarnRMContainerAllocator.java
License:Apache License
public void stop() { if (stopped.get()) { return;//from w w w . ja v a 2 s .co m } LOG.info("un-registering ApplicationMaster(QueryMaster):" + appAttemptId); stopped.set(true); try { FinalApplicationStatus status = FinalApplicationStatus.UNDEFINED; Query query = context.getQuery(); if (query != null) { TajoProtos.QueryState state = query.getState(); if (state == TajoProtos.QueryState.QUERY_SUCCEEDED) { status = FinalApplicationStatus.SUCCEEDED; } else if (state == TajoProtos.QueryState.QUERY_FAILED || state == TajoProtos.QueryState.QUERY_ERROR) { status = FinalApplicationStatus.FAILED; } else if (state == TajoProtos.QueryState.QUERY_ERROR) { status = FinalApplicationStatus.FAILED; } } unregisterApplicationMaster(status, "tajo query finished", null); } catch (Exception e) { LOG.error(e.getMessage(), e); } allocatorThread.interrupt(); LOG.info("un-registered ApplicationMAster(QueryMaster) stopped:" + appAttemptId); super.stop(); }
From source file:org.apache.tajo.master.rm.YarnTajoResourceManager.java
License:Apache License
@Override public void stopQueryMaster(QueryId queryId) { try {//from ww w .ja va 2 s . c o m FinalApplicationStatus appStatus = FinalApplicationStatus.UNDEFINED; QueryInProgress queryInProgress = masterContext.getQueryJobManager().getQueryInProgress(queryId); if (queryInProgress == null) { return; } TajoProtos.QueryState state = queryInProgress.getQueryInfo().getQueryState(); if (state == TajoProtos.QueryState.QUERY_SUCCEEDED) { appStatus = FinalApplicationStatus.SUCCEEDED; } else if (state == TajoProtos.QueryState.QUERY_FAILED || state == TajoProtos.QueryState.QUERY_ERROR) { appStatus = FinalApplicationStatus.FAILED; } else if (state == TajoProtos.QueryState.QUERY_ERROR) { appStatus = FinalApplicationStatus.FAILED; } FinishApplicationMasterRequest request = recordFactory .newRecordInstance(FinishApplicationMasterRequest.class); request.setFinalApplicationStatus(appStatus); request.setDiagnostics("QueryMaster shutdown by TajoMaster."); rmClient.finishApplicationMaster(request); } catch (Exception e) { LOG.error(e.getMessage(), e); } }
From source file:org.apache.tez.client.LocalClient.java
License:Apache License
protected FinalApplicationStatus convertDAGAppMasterStateToFinalYARNState(DAGAppMasterState dagAppMasterState) { switch (dagAppMasterState) { case NEW:/*from w w w .j ava2 s. com*/ case INITED: case RECOVERING: case IDLE: case RUNNING: return FinalApplicationStatus.UNDEFINED; case SUCCEEDED: return FinalApplicationStatus.SUCCEEDED; case FAILED: return FinalApplicationStatus.FAILED; case KILLED: return FinalApplicationStatus.KILLED; case ERROR: return FinalApplicationStatus.FAILED; default: return FinalApplicationStatus.UNDEFINED; } }
From source file:org.apache.tez.dag.app.rm.TaskSchedulerEventHandler.java
License:Apache License
@Override public AppFinalStatus getFinalAppStatus() { FinalApplicationStatus finishState = FinalApplicationStatus.UNDEFINED; StringBuffer sb = new StringBuffer(); if (dagAppMaster == null) { finishState = FinalApplicationStatus.UNDEFINED; sb.append("App not yet initialized"); } else {/* www.ja v a 2 s . c om*/ DAGAppMasterState appMasterState = dagAppMaster.getState(); if (appMasterState == DAGAppMasterState.SUCCEEDED) { finishState = FinalApplicationStatus.SUCCEEDED; } else if (appMasterState == DAGAppMasterState.KILLED || (appMasterState == DAGAppMasterState.RUNNING && isSignalled)) { finishState = FinalApplicationStatus.KILLED; } else if (appMasterState == DAGAppMasterState.FAILED || appMasterState == DAGAppMasterState.ERROR) { finishState = FinalApplicationStatus.FAILED; } else { finishState = FinalApplicationStatus.UNDEFINED; } List<String> diagnostics = dagAppMaster.getDiagnostics(); if (diagnostics != null) { for (String s : diagnostics) { sb.append(s).append("\n"); } } } if (LOG.isDebugEnabled()) { LOG.debug("Setting job diagnostics to " + sb.toString()); } // if history url is set use the same, if historyUrl is set to "" then rm ui disables the // history url return new AppFinalStatus(finishState, sb.toString(), historyUrl); }
From source file:org.apache.tez.dag.app.rm.TaskSchedulerManager.java
License:Apache License
public AppFinalStatus getFinalAppStatus() { FinalApplicationStatus finishState = FinalApplicationStatus.UNDEFINED; StringBuffer sb = new StringBuffer(); if (dagAppMaster == null) { finishState = FinalApplicationStatus.UNDEFINED; sb.append("App not yet initialized"); } else {/*from w w w. j av a 2s. c om*/ DAGAppMasterState appMasterState = dagAppMaster.getState(); if (appMasterState == DAGAppMasterState.SUCCEEDED) { finishState = FinalApplicationStatus.SUCCEEDED; } else if (appMasterState == DAGAppMasterState.KILLED || (appMasterState == DAGAppMasterState.RUNNING && isSignalled)) { finishState = FinalApplicationStatus.KILLED; } else if (appMasterState == DAGAppMasterState.FAILED || appMasterState == DAGAppMasterState.ERROR) { finishState = FinalApplicationStatus.FAILED; } else { finishState = FinalApplicationStatus.UNDEFINED; } List<String> diagnostics = dagAppMaster.getDiagnostics(); if (diagnostics != null) { for (String s : diagnostics) { sb.append(s).append("\n"); } } } if (LOG.isDebugEnabled()) { LOG.debug("Setting job diagnostics to " + sb.toString()); } // if history url is set use the same, if historyUrl is set to "" then rm ui disables the // history url return new AppFinalStatus(finishState, sb.toString(), historyUrl); }
From source file:org.apache.tez.hadoop.shim.TestHadoopShim28.java
License:Apache License
@Test public void testApplyFinalApplicationStatusCorrection() { HadoopShim shim = new HadoopShim28(); // Session mode success/failure, change to ended Assert.assertEquals(FinalApplicationStatus.ENDED, shim.applyFinalApplicationStatusCorrection(FinalApplicationStatus.SUCCEEDED, true, false)); Assert.assertEquals(FinalApplicationStatus.ENDED, shim.applyFinalApplicationStatusCorrection(FinalApplicationStatus.FAILED, true, false)); // Non-session mode success/failure, retain success/failure Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, shim.applyFinalApplicationStatusCorrection(FinalApplicationStatus.SUCCEEDED, false, false)); Assert.assertEquals(FinalApplicationStatus.FAILED, shim.applyFinalApplicationStatusCorrection(FinalApplicationStatus.FAILED, false, false)); // Session and non-session mode error, retain failed. Assert.assertEquals(FinalApplicationStatus.FAILED, shim.applyFinalApplicationStatusCorrection(FinalApplicationStatus.FAILED, true, true)); Assert.assertEquals(FinalApplicationStatus.FAILED, shim.applyFinalApplicationStatusCorrection(FinalApplicationStatus.FAILED, false, true)); // Session and non-session mode killed is killed. Assert.assertEquals(FinalApplicationStatus.KILLED, shim.applyFinalApplicationStatusCorrection(FinalApplicationStatus.KILLED, true, false)); Assert.assertEquals(FinalApplicationStatus.KILLED, shim.applyFinalApplicationStatusCorrection(FinalApplicationStatus.KILLED, false, false)); // Session and non-session mode undefined is undefined. Assert.assertEquals(FinalApplicationStatus.UNDEFINED, shim.applyFinalApplicationStatusCorrection(FinalApplicationStatus.UNDEFINED, true, false)); Assert.assertEquals(FinalApplicationStatus.UNDEFINED, shim.applyFinalApplicationStatusCorrection(FinalApplicationStatus.UNDEFINED, false, false)); }