List of usage examples for org.apache.hadoop.yarn.api.protocolrecords AllocateResponse getCompletedContainersStatuses
@Public @Stable public abstract List<ContainerStatus> getCompletedContainersStatuses();
From source file:com.continuuity.weave.internal.yarn.Hadoop21YarnAMClient.java
License:Apache License
@Override public synchronized void allocate(float progress, AllocateHandler handler) throws Exception { AllocateResponse allocateResponse = amrmClient.allocate(progress); List<ProcessLauncher<YarnContainerInfo>> launchers = Lists .newArrayListWithCapacity(allocateResponse.getAllocatedContainers().size()); for (Container container : allocateResponse.getAllocatedContainers()) { launchers.add(new RunnableProcessLauncher(new Hadoop21YarnContainerInfo(container), nmClient)); }/* w ww . ja v a 2 s.c o m*/ if (!launchers.isEmpty()) { handler.acquired(launchers); // If no process has been launched through the given launcher, return the container. for (ProcessLauncher<YarnContainerInfo> l : launchers) { // This cast always works. RunnableProcessLauncher launcher = (RunnableProcessLauncher) l; if (!launcher.isLaunched()) { Container container = launcher.getContainerInfo().getContainer(); LOG.info("Nothing to run in container, releasing it: {}", container); amrmClient.releaseAssignedContainer(container.getId()); } } } List<YarnContainerStatus> completed = ImmutableList .copyOf(Iterables.transform(allocateResponse.getCompletedContainersStatuses(), STATUS_TRANSFORM)); if (!completed.isEmpty()) { handler.completed(completed); } }
From source file:com.datatorrent.stram.StreamingAppMasterService.java
License:Apache License
/** * Main run function for the application master * * @throws YarnException// w w w .java2 s. c o m */ @SuppressWarnings("SleepWhileInLoop") private void execute() throws YarnException, IOException { LOG.info("Starting ApplicationMaster"); final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); LOG.info("number of tokens: {}", credentials.getAllTokens().size()); Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); LOG.debug("token: {}", token); } final Configuration conf = getConfig(); long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math .min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME))); long expiryTime = System.currentTimeMillis() + tokenLifeTime; LOG.debug(" expiry token time {}", tokenLifeTime); String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE); // Register self with ResourceManager RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0, appMasterTrackingUrl); // Dump out information about cluster capability as seen by the resource manager int maxMem = response.getMaximumResourceCapability().getMemory(); int maxVcores = response.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max mem {}m and vcores {} capabililty of resources in this cluster ", maxMem, maxVcores); // for locality relaxation fall back Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps .newHashMap(); // Setup heartbeat emitter // TODO poll RM every now and then with an empty request to let RM know that we are alive // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting: // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter // is not required. int loopCounter = -1; List<ContainerId> releasedContainers = new ArrayList<ContainerId>(); int numTotalContainers = 0; // keep track of already requested containers to not request them again while waiting for allocation int numRequestedContainers = 0; int numReleasedContainers = 0; int nextRequestPriority = 0; ResourceRequestHandler resourceRequestor = new ResourceRequestHandler(); YarnClient clientRMService = YarnClient.createYarnClient(); try { // YARN-435 // we need getClusterNodes to populate the initial node list, // subsequent updates come through the heartbeat response clientRMService.init(conf); clientRMService.start(); ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService, dag.getAttributes().get(DAG.APPLICATION_NAME), UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID)); if (ar != null) { appDone = true; dnmgr.shutdownDiagnosticsMessage = String.format( "Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.", ar.getApplicationId().toString(), ar.getName(), ar.getUser()); LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage); finishApplication(FinalApplicationStatus.FAILED, numTotalContainers); return; } resourceRequestor.updateNodeReports(clientRMService.getNodeReports()); } catch (Exception e) { throw new RuntimeException("Failed to retrieve cluster nodes report.", e); } finally { clientRMService.stop(); } // check for previously allocated containers // as of 2.2, containers won't survive AM restart, but this will change in the future - YARN-1490 checkContainerStatus(); FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED; final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); while (!appDone) { loopCounter++; if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime && hdfsKeyTabFile != null) { String applicationId = appAttemptID.getApplicationId().toString(); expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, "." + File.separator + "tmp", applicationId, conf, hdfsKeyTabFile, credentials, rmAddress, true); } Runnable r; while ((r = this.pendingTasks.poll()) != null) { r.run(); } // log current state /* * LOG.info("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" + * numTotalContainers + ", requested=" + numRequestedContainers + ", completed=" + numCompletedContainers + * ", failed=" + numFailedContainers + ", currentAllocated=" + this.allAllocatedContainers.size()); */ // Sleep before each loop when asking RM for containers // to avoid flooding RM with spurious requests when it // need not have any available containers try { sleep(1000); } catch (InterruptedException e) { LOG.info("Sleep interrupted " + e.getMessage()); } // Setup request to be sent to RM to allocate containers List<ContainerRequest> containerRequests = new ArrayList<ContainerRequest>(); List<ContainerRequest> removedContainerRequests = new ArrayList<ContainerRequest>(); // request containers for pending deploy requests if (!dnmgr.containerStartRequests.isEmpty()) { StreamingContainerAgent.ContainerStartRequest csr; while ((csr = dnmgr.containerStartRequests.poll()) != null) { if (csr.container.getRequiredMemoryMB() > maxMem) { LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.", csr.container.getRequiredMemoryMB(), maxMem); csr.container.setRequiredMemoryMB(maxMem); } if (csr.container.getRequiredVCores() > maxVcores) { LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.", csr.container.getRequiredVCores(), maxVcores); csr.container.setRequiredVCores(maxVcores); } csr.container.setResourceRequestPriority(nextRequestPriority++); ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true); MutablePair<Integer, ContainerRequest> pair = new MutablePair<Integer, ContainerRequest>( loopCounter, cr); requestedResources.put(csr, pair); containerRequests.add(cr); } } if (!requestedResources.isEmpty()) { //resourceRequestor.clearNodeMapping(); for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources .entrySet()) { if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) { StreamingContainerAgent.ContainerStartRequest csr = entry.getKey(); removedContainerRequests.add(entry.getValue().getRight()); ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false); entry.getValue().setLeft(loopCounter); entry.getValue().setRight(cr); containerRequests.add(cr); } } } numTotalContainers += containerRequests.size(); numRequestedContainers += containerRequests.size(); AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests, releasedContainers); if (amResp.getAMCommand() != null) { LOG.info(" statement executed:{}", amResp.getAMCommand()); switch (amResp.getAMCommand()) { case AM_RESYNC: case AM_SHUTDOWN: throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM"); default: throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM"); } } releasedContainers.clear(); // Retrieve list of allocated containers from the response List<Container> newAllocatedContainers = amResp.getAllocatedContainers(); // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size()); numRequestedContainers -= newAllocatedContainers.size(); long timestamp = System.currentTimeMillis(); for (Container allocatedContainer : newAllocatedContainers) { LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode=" + allocatedContainer.getNodeId() + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory" + allocatedContainer.getResource().getMemory() + ", priority" + allocatedContainer.getPriority()); // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString()); boolean alreadyAllocated = true; StreamingContainerAgent.ContainerStartRequest csr = null; for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources .entrySet()) { if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority() .getPriority()) { alreadyAllocated = false; csr = entry.getKey(); break; } } if (alreadyAllocated) { LOG.info("Releasing {} as resource with priority {} was already assigned", allocatedContainer.getId(), allocatedContainer.getPriority()); releasedContainers.add(allocatedContainer.getId()); numReleasedContainers++; numRequestedContainers++; continue; } if (csr != null) { requestedResources.remove(csr); } // allocate resource to container ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(), allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(), allocatedContainer.getResource().getMemory(), allocatedContainer.getResource().getVirtualCores(), allocatedContainer.getNodeHttpAddress()); StreamingContainerAgent sca = dnmgr.assignContainer(resource, null); if (sca == null) { // allocated container no longer needed, add release request LOG.warn("Container {} allocated but nothing to deploy, going to release this container.", allocatedContainer.getId()); releasedContainers.add(allocatedContainer.getId()); } else { AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer); this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder); ByteBuffer tokens = null; if (UserGroupInformation.isSecurityEnabled()) { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken( ugi.getUserName(), heartbeatListener.getAddress()); allocatedContainerHolder.delegationToken = delegationToken; //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress()); tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken); } LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer, nmClient, sca, tokens); // Thread launchThread = new Thread(runnableLaunchContainer); // launchThreads.add(launchThread); // launchThread.start(); launchContainer.run(); // communication with NMs is now async // record container start event StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString()); ev.setTimestamp(timestamp); dnmgr.recordEventAsync(ev); } } // track node updates for future locality constraint allocations // TODO: it seems 2.0.4-alpha doesn't give us any updates resourceRequestor.updateNodeReports(amResp.getUpdatedNodes()); // Check the completed containers List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses(); // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); AllocatedContainer allocatedContainer = allocatedContainers .remove(containerStatus.getContainerId().toString()); if (allocatedContainer != null && allocatedContainer.delegationToken != null) { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName()); } int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { if (allocatedContainer != null) { numFailedContainers.incrementAndGet(); } // if (exitStatus == 1) { // // non-recoverable StreamingContainer failure // appDone = true; // finalStatus = FinalApplicationStatus.FAILED; // dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId(); // LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage); // } // else { // Recoverable failure or process killed (externally or via stop request by AM) // also occurs when a container was released by the application but never assigned/launched LOG.debug("Container {} failed or killed.", containerStatus.getContainerId()); dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString()); // } } else { // container completed successfully numCompletedContainers.incrementAndGet(); LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } String containerIdStr = containerStatus.getContainerId().toString(); dnmgr.removeContainerAgent(containerIdStr); // record container stop event StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus()); ev.setReason(containerStatus.getDiagnostics()); dnmgr.recordEventAsync(ev); } if (dnmgr.forcedShutdown) { LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage); finalStatus = FinalApplicationStatus.FAILED; appDone = true; } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0 && dnmgr.containerStartRequests.isEmpty()) { LOG.debug("Exiting as no more containers are allocated or requested"); finalStatus = FinalApplicationStatus.SUCCEEDED; appDone = true; } LOG.debug("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" + numTotalContainers + ", requested=" + numRequestedContainers + ", released=" + numReleasedContainers + ", completed=" + numCompletedContainers + ", failed=" + numFailedContainers + ", currentAllocated=" + allocatedContainers.size()); // monitor child containers dnmgr.monitorHeartbeat(); } finishApplication(finalStatus, numTotalContainers); }
From source file:com.yahoo.storm.yarn.MasterServer.java
License:Open Source License
private Thread initAndStartHeartbeat(final StormAMRMClient client, final BlockingQueue<Container> launcherQueue, final int heartBeatIntervalMs) { Thread thread = new Thread() { @Override//w ww. java 2s . co m public void run() { try { while (client.getServiceState() == Service.STATE.STARTED && !Thread.currentThread().isInterrupted()) { Thread.sleep(heartBeatIntervalMs); // We always send 50% progress. AllocateResponse allocResponse = client.allocate(0.5f); AMCommand am_command = allocResponse.getAMCommand(); if (am_command != null && (am_command == AMCommand.AM_SHUTDOWN || am_command == AMCommand.AM_RESYNC)) { LOG.info("Got AM_SHUTDOWN or AM_RESYNC from the RM"); _handler.stop(); System.exit(0); } List<Container> allocatedContainers = allocResponse.getAllocatedContainers(); if (allocatedContainers.size() > 0) { // Add newly allocated containers to the client. LOG.info("HB: Received allocated containers (" + allocatedContainers.size() + ")"); client.addAllocatedContainers(allocatedContainers); if (client.supervisorsAreToRun()) { LOG.info("HB: Supervisors are to run, so queueing (" + allocatedContainers.size() + ") containers..."); launcherQueue.addAll(allocatedContainers); } else { LOG.info("HB: Supervisors are to stop, so releasing all containers..."); client.stopAllSupervisors(); } } List<ContainerStatus> completedContainers = allocResponse.getCompletedContainersStatuses(); if (completedContainers.size() > 0 && client.supervisorsAreToRun()) { LOG.debug("HB: Containers completed (" + completedContainers.size() + "), so releasing them."); client.startAllSupervisors(); } } } catch (Throwable t) { // Something happened we could not handle. Make sure the AM goes // down so that we are not surprised later on that our heart // stopped.. LOG.error("Unhandled error in AM: ", t); _handler.stop(); System.exit(1); } } }; thread.start(); return thread; }
From source file:disAMS.AMRMClient.Impl.AMRMClientImpl.java
License:Apache License
@Override public AllocateResponse allocate(float progressIndicator) throws YarnException, IOException { Preconditions.checkArgument(progressIndicator >= 0, "Progress indicator should not be negative"); AllocateResponse allocateResponse = null; List<ResourceRequest> askList = null; List<ContainerId> releaseList = null; AllocateRequest allocateRequest = null; List<String> blacklistToAdd = new ArrayList<String>(); List<String> blacklistToRemove = new ArrayList<String>(); try {/*ww w .j a v a2 s . co m*/ synchronized (this) { askList = new ArrayList<ResourceRequest>(ask.size()); for (ResourceRequest r : ask) { // create a copy of ResourceRequest as we might change it while the // RPC layer is using it to send info across askList.add(ResourceRequest.newInstance(r.getPriority(), r.getResourceName(), r.getCapability(), r.getNumContainers(), r.getRelaxLocality(), r.getNodeLabelExpression())); } releaseList = new ArrayList<ContainerId>(release); // optimistically clear this collection assuming no RPC failure ask.clear(); release.clear(); blacklistToAdd.addAll(blacklistAdditions); blacklistToRemove.addAll(blacklistRemovals); ResourceBlacklistRequest blacklistRequest = (blacklistToAdd != null) || (blacklistToRemove != null) ? ResourceBlacklistRequest.newInstance(blacklistToAdd, blacklistToRemove) : null; allocateRequest = AllocateRequest.newInstance(lastResponseId, progressIndicator, askList, releaseList, blacklistRequest); // clear blacklistAdditions and blacklistRemovals before // unsynchronized part blacklistAdditions.clear(); blacklistRemovals.clear(); } try { allocateResponse = rmClient.allocate(allocateRequest); } catch (ApplicationMasterNotRegisteredException e) { LOG.warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing."); synchronized (this) { release.addAll(this.pendingRelease); blacklistAdditions.addAll(this.blacklistedNodes); for (Map<String, TreeMap<Resource, ResourceRequestInfo>> rr : remoteRequestsTable.values()) { for (Map<Resource, ResourceRequestInfo> capabalities : rr.values()) { for (ResourceRequestInfo request : capabalities.values()) { addResourceRequestToAsk(request.remoteRequest); } } } } // re register with RM registerApplicationMaster(); allocateResponse = allocate(progressIndicator); return allocateResponse; } synchronized (this) { // update these on successful RPC clusterNodeCount = allocateResponse.getNumClusterNodes(); lastResponseId = allocateResponse.getResponseId(); clusterAvailableResources = allocateResponse.getAvailableResources(); if (!allocateResponse.getNMTokens().isEmpty()) { populateNMTokens(allocateResponse.getNMTokens()); } if (allocateResponse.getAMRMToken() != null) { updateAMRMToken(allocateResponse.getAMRMToken()); } if (!pendingRelease.isEmpty() && !allocateResponse.getCompletedContainersStatuses().isEmpty()) { removePendingReleaseRequests(allocateResponse.getCompletedContainersStatuses()); } } } finally { // TODO how to differentiate remote yarn exception vs error in rpc if (allocateResponse == null) { // we hit an exception in allocate() // preserve ask and release for next call to allocate() synchronized (this) { release.addAll(releaseList); // requests could have been added or deleted during call to allocate // If requests were added/removed then there is nothing to do since // the ResourceRequest object in ask would have the actual new value. // If ask does not have this ResourceRequest then it was unchanged and // so we can add the value back safely. // This assumes that there will no concurrent calls to allocate() and // so we dont have to worry about ask being changed in the // synchronized block at the beginning of this method. for (ResourceRequest oldAsk : askList) { if (!ask.contains(oldAsk)) { ask.add(oldAsk); } } blacklistAdditions.addAll(blacklistToAdd); blacklistRemovals.addAll(blacklistToRemove); } } } return allocateResponse; }
From source file:edu.cmu.graphchi.toolkits.collaborative_filtering.yarn.ApplicationMaster.java
License:Apache License
/** * Main run function for the application master * * @throws YarnException//from w w w. j av a 2 s . c om * @throws IOException */ @SuppressWarnings({ "unchecked" }) public boolean run() throws YarnException, IOException { yarnClient.start(); LOG.info("Starting ApplicationMaster"); Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); // Now remove the AM->RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amRMClient = AMRMClient.createAMRMClient(); amRMClient.init(conf); amRMClient.start(); containerListener = createNMCallbackHandler(); nmClientAsync = new NMClientAsyncImpl(containerListener); nmClientAsync.init(conf); nmClientAsync.start(); // Register self with ResourceManager // This will start heartbeating to the RM appMasterHostname = NetUtils.getHostname(); RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); //TODO: Figure out how to do this. List<NodeReport> reports = this.yarnClient.getNodeReports(); LOG.info("Cluster Status"); List<Resource> availableResources = new ArrayList<Resource>(); for (NodeReport nr : reports) { LOG.info(" NodeId: " + nr.getNodeId() + " Capabilities " + nr.getCapability() + " Used Resources " + nr.getUsed()); int availableMem = nr.getCapability().getMemory() - nr.getUsed().getMemory(); int availableVCores = nr.getCapability().getVirtualCores() - nr.getUsed().getVirtualCores(); Resource resource = Resource.newInstance(availableMem, availableVCores); availableResources.add(resource); } /*Based on available resources scheduler should decide the best allocation of recommenders to resources and return a list of resources that should be requested from the ResourceManager*/ DataSetDescription datasetDesc = new DataSetDescription(this.setup.dataMetadataFile); List<RecommenderPool> recommenderPools = RecommenderScheduler.splitRecommenderPool(availableResources, recommenders, datasetDesc, this.setup.nShards); for (RecommenderPool res : recommenderPools) { ContainerRequest containerAsk = setupContainerAskForRM(res.getTotalMemory(), requestPriority); LOG.info("CONTAINER ASK: " + containerAsk); amRMClient.addContainerRequest(containerAsk); } float progress = 0; List<RecommenderPool> pendingPools = new ArrayList<RecommenderPool>(); for (RecommenderPool p : recommenderPools) pendingPools.add(p); this.numTotalContainers = recommenderPools.size(); this.numCompletedContainers.set(0); this.numAllocatedContainers.set(0); while (numCompletedContainers.get() != numTotalContainers) { try { Thread.sleep(200); AllocateResponse allocResp = amRMClient.allocate(progress); List<Container> newContainers = allocResp.getAllocatedContainers(); List<ContainerStatus> completedContainers = allocResp.getCompletedContainersStatuses(); if (this.numAllocatedContainers.get() >= this.numTotalContainers && pendingPools.size() != 0) { //Ask for new containers for pending pools LOG.warn("The number of allocated containers has exceeded number of total containers, but " + "the pending pool size is still not 0. Asking for new containers from RM"); for (RecommenderPool res : pendingPools) { ContainerRequest containerAsk = setupContainerAskForRM(res.getTotalMemory(), requestPriority); LOG.info("NEW CONTAINER ASK: " + containerAsk); amRMClient.addContainerRequest(containerAsk); } } if (newContainers.size() > 0) { LOG.info("Allocated " + newContainers.size() + " new containers"); numAllocatedContainers.addAndGet(newContainers.size()); for (Container container : newContainers) { //Find matching recommender pool from pendingRecommender pools. RecommenderPool pool = null; for (RecommenderPool p : pendingPools) { if (p.getTotalMemory() == container.getResource().getMemory()) { pool = p; break; } } if (pool == null) { LOG.warn("No Takers for Container " + container + " Releasing container"); amRMClient.releaseAssignedContainer(container.getId()); } else { startContainer(container, pool); //This pool has now got a container. Remove it from pending pools pendingPools.remove(pool); } } } onContainersCompleted(completedContainers); } catch (InterruptedException ex) { } } finish(); return success; }
From source file:eu.stratosphere.yarn.ApplicationMaster.java
License:Apache License
private void run() throws Exception { //Utils.logFilesInCurrentDirectory(LOG); // Initialize clients to ResourceManager and NodeManagers Configuration conf = Utils.initializeYarnConfiguration(); FileSystem fs = FileSystem.get(conf); Map<String, String> envs = System.getenv(); final String currDir = envs.get(Environment.PWD.key()); final String logDirs = envs.get(Environment.LOG_DIRS.key()); final String ownHostname = envs.get(Environment.NM_HOST.key()); final String appId = envs.get(Client.ENV_APP_ID); final String clientHomeDir = envs.get(Client.ENV_CLIENT_HOME_DIR); final String applicationMasterHost = envs.get(Environment.NM_HOST.key()); final String remoteStratosphereJarPath = envs.get(Client.STRATOSPHERE_JAR_PATH); final String shipListString = envs.get(Client.ENV_CLIENT_SHIP_FILES); final String yarnClientUsername = envs.get(Client.ENV_CLIENT_USERNAME); final int taskManagerCount = Integer.valueOf(envs.get(Client.ENV_TM_COUNT)); final int memoryPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_MEMORY)); final int coresPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_CORES)); int heapLimit = Utils.calculateHeapSize(memoryPerTaskManager); if (currDir == null) { throw new RuntimeException("Current directory unknown"); }// w ww. j av a 2s .com if (ownHostname == null) { throw new RuntimeException("Own hostname (" + Environment.NM_HOST + ") not set."); } LOG.info("Working directory " + currDir); // load Stratosphere configuration. Utils.getStratosphereConfiguration(currDir); final String localWebInterfaceDir = currDir + "/resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME; // Update yaml conf -> set jobManager address to this machine's address. FileInputStream fis = new FileInputStream(currDir + "/stratosphere-conf.yaml"); BufferedReader br = new BufferedReader(new InputStreamReader(fis)); Writer output = new BufferedWriter(new FileWriter(currDir + "/stratosphere-conf-modified.yaml")); String line; while ((line = br.readLine()) != null) { if (line.contains(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)) { output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n"); } else if (line.contains(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY)) { output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + "\n"); } else { output.append(line + "\n"); } } // just to make sure. output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n"); output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + localWebInterfaceDir + "\n"); output.append(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY + ": " + logDirs + "\n"); output.close(); br.close(); File newConf = new File(currDir + "/stratosphere-conf-modified.yaml"); if (!newConf.exists()) { LOG.warn("modified yaml does not exist!"); } Utils.copyJarContents("resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME, ApplicationMaster.class.getProtectionDomain().getCodeSource().getLocation().getPath()); JobManager jm; { String pathToNepheleConfig = currDir + "/stratosphere-conf-modified.yaml"; String[] args = { "-executionMode", "cluster", "-configDir", pathToNepheleConfig }; // start the job manager jm = JobManager.initialize(args); // Start info server for jobmanager jm.startInfoServer(); } AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient(); rmClient.init(conf); rmClient.start(); NMClient nmClient = NMClient.createNMClient(); nmClient.init(conf); nmClient.start(); // Register with ResourceManager LOG.info("registering ApplicationMaster"); rmClient.registerApplicationMaster(applicationMasterHost, 0, "http://" + applicationMasterHost + ":" + GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, "undefined")); // Priority for worker containers - priorities are intra-application Priority priority = Records.newRecord(Priority.class); priority.setPriority(0); // Resource requirements for worker containers Resource capability = Records.newRecord(Resource.class); capability.setMemory(memoryPerTaskManager); capability.setVirtualCores(coresPerTaskManager); // Make container requests to ResourceManager for (int i = 0; i < taskManagerCount; ++i) { ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority); LOG.info("Requesting TaskManager container " + i); rmClient.addContainerRequest(containerAsk); } LocalResource stratosphereJar = Records.newRecord(LocalResource.class); LocalResource stratosphereConf = Records.newRecord(LocalResource.class); // register Stratosphere Jar with remote HDFS final Path remoteJarPath = new Path(remoteStratosphereJarPath); Utils.registerLocalResource(fs, remoteJarPath, stratosphereJar); // register conf with local fs. Path remoteConfPath = Utils.setupLocalResource(conf, fs, appId, new Path("file://" + currDir + "/stratosphere-conf-modified.yaml"), stratosphereConf, new Path(clientHomeDir)); LOG.info("Prepared localresource for modified yaml: " + stratosphereConf); boolean hasLog4j = new File(currDir + "/log4j.properties").exists(); // prepare the files to ship LocalResource[] remoteShipRsc = null; String[] remoteShipPaths = shipListString.split(","); if (!shipListString.isEmpty()) { remoteShipRsc = new LocalResource[remoteShipPaths.length]; { // scope for i int i = 0; for (String remoteShipPathStr : remoteShipPaths) { if (remoteShipPathStr == null || remoteShipPathStr.isEmpty()) { continue; } remoteShipRsc[i] = Records.newRecord(LocalResource.class); Path remoteShipPath = new Path(remoteShipPathStr); Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]); i++; } } } // respect custom JVM options in the YAML file final String javaOpts = GlobalConfiguration.getString(ConfigConstants.STRATOSPHERE_JVM_OPTIONS, ""); // Obtain allocated containers and launch int allocatedContainers = 0; int completedContainers = 0; while (allocatedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(0); for (Container container : response.getAllocatedContainers()) { LOG.info("Got new Container for TM " + container.getId() + " on host " + container.getNodeId().getHost()); ++allocatedContainers; // Launch container by create ContainerLaunchContext ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); String tmCommand = "$JAVA_HOME/bin/java -Xmx" + heapLimit + "m " + javaOpts; if (hasLog4j) { tmCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties"; } tmCommand += " eu.stratosphere.yarn.YarnTaskManagerRunner -configDir . " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stdout.log" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stderr.log"; ctx.setCommands(Collections.singletonList(tmCommand)); LOG.info("Starting TM with command=" + tmCommand); // copy resources to the TaskManagers. Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2); localResources.put("stratosphere.jar", stratosphereJar); localResources.put("stratosphere-conf.yaml", stratosphereConf); // add ship resources if (!shipListString.isEmpty()) { Preconditions.checkNotNull(remoteShipRsc); for (int i = 0; i < remoteShipPaths.length; i++) { localResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]); } } ctx.setLocalResources(localResources); // Setup CLASSPATH for Container (=TaskTracker) Map<String, String> containerEnv = new HashMap<String, String>(); Utils.setupEnv(conf, containerEnv); //add stratosphere.jar to class path. containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername); ctx.setEnvironment(containerEnv); UserGroupInformation user = UserGroupInformation.getCurrentUser(); try { Credentials credentials = user.getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); ctx.setTokens(securityTokens); } catch (IOException e) { LOG.warn("Getting current user info failed when trying to launch the container" + e.getMessage()); } LOG.info("Launching container " + allocatedContainers); nmClient.startContainer(container, ctx); } for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container (while allocating) " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); } Thread.sleep(100); } // Now wait for containers to complete while (completedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(completedContainers / taskManagerCount); for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); } Thread.sleep(5000); } LOG.info("Shutting down JobManager"); jm.shutdown(); // Un-register with ResourceManager rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", ""); }
From source file:org.apache.flink.yarn.ApplicationMaster.java
License:Apache License
private void run() throws Exception { //Utils.logFilesInCurrentDirectory(LOG); // Initialize clients to ResourceManager and NodeManagers Configuration conf = Utils.initializeYarnConfiguration(); FileSystem fs = FileSystem.get(conf); Map<String, String> envs = System.getenv(); final String currDir = envs.get(Environment.PWD.key()); final String logDirs = envs.get(Environment.LOG_DIRS.key()); final String ownHostname = envs.get(Environment.NM_HOST.key()); final String appId = envs.get(Client.ENV_APP_ID); final String clientHomeDir = envs.get(Client.ENV_CLIENT_HOME_DIR); final String applicationMasterHost = envs.get(Environment.NM_HOST.key()); final String remoteFlinkJarPath = envs.get(Client.FLINK_JAR_PATH); final String shipListString = envs.get(Client.ENV_CLIENT_SHIP_FILES); final String yarnClientUsername = envs.get(Client.ENV_CLIENT_USERNAME); final int taskManagerCount = Integer.valueOf(envs.get(Client.ENV_TM_COUNT)); final int memoryPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_MEMORY)); final int coresPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_CORES)); int heapLimit = Utils.calculateHeapSize(memoryPerTaskManager); if (currDir == null) { throw new RuntimeException("Current directory unknown"); }//from ww w . j ava 2 s .c om if (ownHostname == null) { throw new RuntimeException("Own hostname (" + Environment.NM_HOST + ") not set."); } LOG.info("Working directory " + currDir); // load Flink configuration. Utils.getFlinkConfiguration(currDir); final String localWebInterfaceDir = currDir + "/resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME; // Update yaml conf -> set jobManager address to this machine's address. FileInputStream fis = new FileInputStream(currDir + "/flink-conf.yaml"); BufferedReader br = new BufferedReader(new InputStreamReader(fis)); Writer output = new BufferedWriter(new FileWriter(currDir + "/flink-conf-modified.yaml")); String line; while ((line = br.readLine()) != null) { if (line.contains(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)) { output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n"); } else if (line.contains(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY)) { output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + "\n"); } else { output.append(line + "\n"); } } // just to make sure. output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n"); output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + localWebInterfaceDir + "\n"); output.append(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY + ": " + logDirs + "\n"); output.close(); br.close(); File newConf = new File(currDir + "/flink-conf-modified.yaml"); if (!newConf.exists()) { LOG.warn("modified yaml does not exist!"); } Utils.copyJarContents("resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME, ApplicationMaster.class.getProtectionDomain().getCodeSource().getLocation().getPath()); JobManager jm; { String pathToNepheleConfig = currDir + "/flink-conf-modified.yaml"; String[] args = { "-executionMode", "cluster", "-configDir", pathToNepheleConfig }; // start the job manager jm = JobManager.initialize(args); // Start info server for jobmanager jm.startInfoServer(); } AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient(); rmClient.init(conf); rmClient.start(); NMClient nmClient = NMClient.createNMClient(); nmClient.init(conf); nmClient.start(); // Register with ResourceManager LOG.info("registering ApplicationMaster"); rmClient.registerApplicationMaster(applicationMasterHost, 0, "http://" + applicationMasterHost + ":" + GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, "undefined")); // Priority for worker containers - priorities are intra-application Priority priority = Records.newRecord(Priority.class); priority.setPriority(0); // Resource requirements for worker containers Resource capability = Records.newRecord(Resource.class); capability.setMemory(memoryPerTaskManager); capability.setVirtualCores(coresPerTaskManager); // Make container requests to ResourceManager for (int i = 0; i < taskManagerCount; ++i) { ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority); LOG.info("Requesting TaskManager container " + i); rmClient.addContainerRequest(containerAsk); } LocalResource flinkJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); // register Flink Jar with remote HDFS final Path remoteJarPath = new Path(remoteFlinkJarPath); Utils.registerLocalResource(fs, remoteJarPath, flinkJar); // register conf with local fs. Path remoteConfPath = Utils.setupLocalResource(conf, fs, appId, new Path("file://" + currDir + "/flink-conf-modified.yaml"), flinkConf, new Path(clientHomeDir)); LOG.info("Prepared localresource for modified yaml: " + flinkConf); boolean hasLog4j = new File(currDir + "/log4j.properties").exists(); // prepare the files to ship LocalResource[] remoteShipRsc = null; String[] remoteShipPaths = shipListString.split(","); if (!shipListString.isEmpty()) { remoteShipRsc = new LocalResource[remoteShipPaths.length]; { // scope for i int i = 0; for (String remoteShipPathStr : remoteShipPaths) { if (remoteShipPathStr == null || remoteShipPathStr.isEmpty()) { continue; } remoteShipRsc[i] = Records.newRecord(LocalResource.class); Path remoteShipPath = new Path(remoteShipPathStr); Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]); i++; } } } // respect custom JVM options in the YAML file final String javaOpts = GlobalConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, ""); // Obtain allocated containers and launch int allocatedContainers = 0; int completedContainers = 0; while (allocatedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(0); for (Container container : response.getAllocatedContainers()) { LOG.info("Got new Container for TM " + container.getId() + " on host " + container.getNodeId().getHost()); ++allocatedContainers; // Launch container by create ContainerLaunchContext ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); String tmCommand = "$JAVA_HOME/bin/java -Xmx" + heapLimit + "m " + javaOpts; if (hasLog4j) { tmCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties"; } tmCommand += " org.apache.flink.yarn.YarnTaskManagerRunner -configDir . " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stdout.log" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stderr.log"; ctx.setCommands(Collections.singletonList(tmCommand)); LOG.info("Starting TM with command=" + tmCommand); // copy resources to the TaskManagers. Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2); localResources.put("flink.jar", flinkJar); localResources.put("flink-conf.yaml", flinkConf); // add ship resources if (!shipListString.isEmpty()) { Preconditions.checkNotNull(remoteShipRsc); for (int i = 0; i < remoteShipPaths.length; i++) { localResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]); } } ctx.setLocalResources(localResources); // Setup CLASSPATH for Container (=TaskTracker) Map<String, String> containerEnv = new HashMap<String, String>(); Utils.setupEnv(conf, containerEnv); //add flink.jar to class path. containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername); ctx.setEnvironment(containerEnv); UserGroupInformation user = UserGroupInformation.getCurrentUser(); try { Credentials credentials = user.getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); ctx.setTokens(securityTokens); } catch (IOException e) { LOG.warn("Getting current user info failed when trying to launch the container" + e.getMessage()); } LOG.info("Launching container " + allocatedContainers); nmClient.startContainer(container, ctx); } for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container (while allocating) " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); } Thread.sleep(100); } // Now wait for containers to complete while (completedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(completedContainers / taskManagerCount); for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); } Thread.sleep(5000); } LOG.info("Shutting down JobManager"); jm.shutdown(); // Un-register with ResourceManager rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", ""); }
From source file:org.apache.flink.yarn.appMaster.ApplicationMaster.java
License:Apache License
private void run() throws Exception { heapLimit = Utils.calculateHeapSize(memoryPerTaskManager); nmClient = NMClient.createNMClient(); nmClient.init(conf);/* w w w .j a va 2 s.c om*/ nmClient.start(); nmClient.cleanupRunningContainersOnStop(true); // Register with ResourceManager String url = "http://" + applicationMasterHost + ":" + jobManagerWebPort; LOG.info("Registering ApplicationMaster with tracking url " + url); rmClient.registerApplicationMaster(applicationMasterHost, 0, url); // Priority for worker containers - priorities are intra-application Priority priority = Records.newRecord(Priority.class); priority.setPriority(0); // Resource requirements for worker containers Resource capability = Records.newRecord(Resource.class); capability.setMemory(memoryPerTaskManager); capability.setVirtualCores(coresPerTaskManager); // Make container requests to ResourceManager for (int i = 0; i < taskManagerCount; ++i) { ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority); LOG.info("Requesting TaskManager container " + i); rmClient.addContainerRequest(containerAsk); } LocalResource flinkJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); // register Flink Jar with remote HDFS final Path remoteJarPath = new Path(remoteFlinkJarPath); Utils.registerLocalResource(fs, remoteJarPath, flinkJar); // register conf with local fs. Utils.setupLocalResource(conf, fs, appId, new Path("file://" + currDir + "/flink-conf-modified.yaml"), flinkConf, new Path(clientHomeDir)); LOG.info("Prepared local resource for modified yaml: " + flinkConf); hasLogback = new File(currDir + "/logback.xml").exists(); // prepare the files to ship LocalResource[] remoteShipRsc = null; String[] remoteShipPaths = shipListString.split(","); if (!shipListString.isEmpty()) { remoteShipRsc = new LocalResource[remoteShipPaths.length]; { // scope for i int i = 0; for (String remoteShipPathStr : remoteShipPaths) { if (remoteShipPathStr == null || remoteShipPathStr.isEmpty()) { continue; } remoteShipRsc[i] = Records.newRecord(LocalResource.class); Path remoteShipPath = new Path(remoteShipPathStr); Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]); i++; } } } // copy resources to the TaskManagers. taskManagerLocalResources = new HashMap<String, LocalResource>(2); taskManagerLocalResources.put("flink.jar", flinkJar); taskManagerLocalResources.put("flink-conf.yaml", flinkConf); // add ship resources if (!shipListString.isEmpty()) { Preconditions.checkNotNull(remoteShipRsc); for (int i = 0; i < remoteShipPaths.length; i++) { taskManagerLocalResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]); } } completedContainers = 0; // Obtain allocated containers and launch StringBuffer containerDiag = new StringBuffer(); // diagnostics log for the containers. allocateOutstandingContainer(containerDiag); LOG.info("Allocated all initial containers"); // Now wait for containers to complete while (completedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(completedContainers / taskManagerCount); for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); logDeadContainer(status, containerDiag); } Thread.sleep(5000); } if (isClosed) { return; } // Un-register with ResourceManager final String diagnosticsMessage = "Application Master shut down after all " + "containers finished\n" + containerDiag.toString(); LOG.info("Diagnostics message: " + diagnosticsMessage); rmClient.unregisterApplicationMaster(FinalApplicationStatus.FAILED, diagnosticsMessage, ""); this.close(); amRpcServer.stop(); // we need to manually stop the RPC service. Usually, the Client stops the RPC, // but at this point, the AM has been shut down (for some reason). LOG.info("Application Master shutdown completed."); }
From source file:org.apache.flink.yarn.appMaster.ApplicationMaster.java
License:Apache License
/** * Run a Thread to allocate new containers until taskManagerCount * is correct again.// w w w . ja v a2 s. c om */ private void allocateOutstandingContainer(StringBuffer containerDiag) throws Exception { // respect custom JVM options in the YAML file final String javaOpts = GlobalConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, ""); int allocatedContainers = 0; while (allocatedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(0); for (Container container : response.getAllocatedContainers()) { LOG.info("Got new Container for TM " + container.getId() + " on host " + container.getNodeId().getHost()); ++allocatedContainers; // Launch container by create ContainerLaunchContext ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); String tmCommand = "$JAVA_HOME/bin/java -Xmx" + heapLimit + "m " + javaOpts; if (hasLogback) { tmCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-logback" + ".log\" -Dlogback.configurationFile=file:logback.xml"; } tmCommand += " " + YarnTaskManagerRunner.class.getName() + " -configDir . " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stdout.log" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stderr.log"; ctx.setCommands(Collections.singletonList(tmCommand)); LOG.info("Starting TM with command=" + tmCommand); ctx.setLocalResources(taskManagerLocalResources); // Setup CLASSPATH for Container (=TaskTracker) Map<String, String> containerEnv = new HashMap<String, String>(); Utils.setupEnv(conf, containerEnv); //add flink.jar to class path. containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername); ctx.setEnvironment(containerEnv); UserGroupInformation user = UserGroupInformation.getCurrentUser(); try { Credentials credentials = user.getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); ctx.setTokens(securityTokens); } catch (IOException e) { LOG.warn("Getting current user info failed when trying to launch the container", e); } LOG.info("Launching container " + allocatedContainers); nmClient.startContainer(container, ctx); } for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container (while allocating) " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); // status. logDeadContainer(status, containerDiag); } Thread.sleep(100); } }
From source file:org.apache.twill.internal.yarn.Hadoop21YarnAMClient.java
License:Apache License
@Override protected AllocateResult doAllocate(float progress) throws Exception { AllocateResponse allocateResponse = amrmClient.allocate(progress); List<RunnableProcessLauncher> launchers = Lists .newArrayListWithCapacity(allocateResponse.getAllocatedContainers().size()); for (Container container : allocateResponse.getAllocatedContainers()) { launchers.add(new RunnableProcessLauncher(new Hadoop21YarnContainerInfo(container), nmClient)); }/*from www . jav a 2 s. c o m*/ List<YarnContainerStatus> completed = ImmutableList .copyOf(Iterables.transform(allocateResponse.getCompletedContainersStatuses(), STATUS_TRANSFORM)); return new AllocateResult(launchers, completed); }