List of usage examples for org.apache.hadoop.yarn.api.records Container getNodeId
@Public @Stable public abstract NodeId getNodeId();
From source file:org.apache.drill.yarn.appMaster.NodeInventory.java
License:Apache License
public void reserve(Container container) { reserve(container.getNodeId().getHost()); }
From source file:org.apache.drill.yarn.appMaster.NodeInventory.java
License:Apache License
public void release(Container container) { release(container.getNodeId().getHost()); }
From source file:org.apache.drill.yarn.core.DoYUtil.java
License:Apache License
public static String labelContainer(Container container) { StringBuilder buf = new StringBuilder().append("[id: ").append(container.getId()).append(", host: ") .append(container.getNodeId().getHost()).append(", priority: ").append(container.getPriority()) .append("]"); return buf.toString(); }
From source file:org.apache.drill.yarn.core.DoYUtil.java
License:Apache License
/** * Utility method to display YARN container information in a useful way for * log messages.//from w w w . j av a2s .c o m * * @param container * @return */ public static String describeContainer(Container container) { StringBuilder buf = new StringBuilder().append("[id: ").append(container.getId()).append(", host: ") .append(container.getNodeId().getHost()).append(", priority: ").append(container.getPriority()) .append(", memory: ").append(container.getResource().getMemory()).append(" MB, vcores: ") .append(container.getResource().getVirtualCores()).append("]"); return buf.toString(); }
From source file:org.apache.flink.yarn.ApplicationMaster.java
License:Apache License
private void run() throws Exception { //Utils.logFilesInCurrentDirectory(LOG); // Initialize clients to ResourceManager and NodeManagers Configuration conf = Utils.initializeYarnConfiguration(); FileSystem fs = FileSystem.get(conf); Map<String, String> envs = System.getenv(); final String currDir = envs.get(Environment.PWD.key()); final String logDirs = envs.get(Environment.LOG_DIRS.key()); final String ownHostname = envs.get(Environment.NM_HOST.key()); final String appId = envs.get(Client.ENV_APP_ID); final String clientHomeDir = envs.get(Client.ENV_CLIENT_HOME_DIR); final String applicationMasterHost = envs.get(Environment.NM_HOST.key()); final String remoteFlinkJarPath = envs.get(Client.FLINK_JAR_PATH); final String shipListString = envs.get(Client.ENV_CLIENT_SHIP_FILES); final String yarnClientUsername = envs.get(Client.ENV_CLIENT_USERNAME); final int taskManagerCount = Integer.valueOf(envs.get(Client.ENV_TM_COUNT)); final int memoryPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_MEMORY)); final int coresPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_CORES)); int heapLimit = Utils.calculateHeapSize(memoryPerTaskManager); if (currDir == null) { throw new RuntimeException("Current directory unknown"); }//from w ww .java 2 s . c o m if (ownHostname == null) { throw new RuntimeException("Own hostname (" + Environment.NM_HOST + ") not set."); } LOG.info("Working directory " + currDir); // load Flink configuration. Utils.getFlinkConfiguration(currDir); final String localWebInterfaceDir = currDir + "/resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME; // Update yaml conf -> set jobManager address to this machine's address. FileInputStream fis = new FileInputStream(currDir + "/flink-conf.yaml"); BufferedReader br = new BufferedReader(new InputStreamReader(fis)); Writer output = new BufferedWriter(new FileWriter(currDir + "/flink-conf-modified.yaml")); String line; while ((line = br.readLine()) != null) { if (line.contains(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)) { output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n"); } else if (line.contains(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY)) { output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + "\n"); } else { output.append(line + "\n"); } } // just to make sure. output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n"); output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + localWebInterfaceDir + "\n"); output.append(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY + ": " + logDirs + "\n"); output.close(); br.close(); File newConf = new File(currDir + "/flink-conf-modified.yaml"); if (!newConf.exists()) { LOG.warn("modified yaml does not exist!"); } Utils.copyJarContents("resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME, ApplicationMaster.class.getProtectionDomain().getCodeSource().getLocation().getPath()); JobManager jm; { String pathToNepheleConfig = currDir + "/flink-conf-modified.yaml"; String[] args = { "-executionMode", "cluster", "-configDir", pathToNepheleConfig }; // start the job manager jm = JobManager.initialize(args); // Start info server for jobmanager jm.startInfoServer(); } AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient(); rmClient.init(conf); rmClient.start(); NMClient nmClient = NMClient.createNMClient(); nmClient.init(conf); nmClient.start(); // Register with ResourceManager LOG.info("registering ApplicationMaster"); rmClient.registerApplicationMaster(applicationMasterHost, 0, "http://" + applicationMasterHost + ":" + GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, "undefined")); // Priority for worker containers - priorities are intra-application Priority priority = Records.newRecord(Priority.class); priority.setPriority(0); // Resource requirements for worker containers Resource capability = Records.newRecord(Resource.class); capability.setMemory(memoryPerTaskManager); capability.setVirtualCores(coresPerTaskManager); // Make container requests to ResourceManager for (int i = 0; i < taskManagerCount; ++i) { ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority); LOG.info("Requesting TaskManager container " + i); rmClient.addContainerRequest(containerAsk); } LocalResource flinkJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); // register Flink Jar with remote HDFS final Path remoteJarPath = new Path(remoteFlinkJarPath); Utils.registerLocalResource(fs, remoteJarPath, flinkJar); // register conf with local fs. Path remoteConfPath = Utils.setupLocalResource(conf, fs, appId, new Path("file://" + currDir + "/flink-conf-modified.yaml"), flinkConf, new Path(clientHomeDir)); LOG.info("Prepared localresource for modified yaml: " + flinkConf); boolean hasLog4j = new File(currDir + "/log4j.properties").exists(); // prepare the files to ship LocalResource[] remoteShipRsc = null; String[] remoteShipPaths = shipListString.split(","); if (!shipListString.isEmpty()) { remoteShipRsc = new LocalResource[remoteShipPaths.length]; { // scope for i int i = 0; for (String remoteShipPathStr : remoteShipPaths) { if (remoteShipPathStr == null || remoteShipPathStr.isEmpty()) { continue; } remoteShipRsc[i] = Records.newRecord(LocalResource.class); Path remoteShipPath = new Path(remoteShipPathStr); Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]); i++; } } } // respect custom JVM options in the YAML file final String javaOpts = GlobalConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, ""); // Obtain allocated containers and launch int allocatedContainers = 0; int completedContainers = 0; while (allocatedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(0); for (Container container : response.getAllocatedContainers()) { LOG.info("Got new Container for TM " + container.getId() + " on host " + container.getNodeId().getHost()); ++allocatedContainers; // Launch container by create ContainerLaunchContext ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); String tmCommand = "$JAVA_HOME/bin/java -Xmx" + heapLimit + "m " + javaOpts; if (hasLog4j) { tmCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties"; } tmCommand += " org.apache.flink.yarn.YarnTaskManagerRunner -configDir . " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stdout.log" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stderr.log"; ctx.setCommands(Collections.singletonList(tmCommand)); LOG.info("Starting TM with command=" + tmCommand); // copy resources to the TaskManagers. Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2); localResources.put("flink.jar", flinkJar); localResources.put("flink-conf.yaml", flinkConf); // add ship resources if (!shipListString.isEmpty()) { Preconditions.checkNotNull(remoteShipRsc); for (int i = 0; i < remoteShipPaths.length; i++) { localResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]); } } ctx.setLocalResources(localResources); // Setup CLASSPATH for Container (=TaskTracker) Map<String, String> containerEnv = new HashMap<String, String>(); Utils.setupEnv(conf, containerEnv); //add flink.jar to class path. containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername); ctx.setEnvironment(containerEnv); UserGroupInformation user = UserGroupInformation.getCurrentUser(); try { Credentials credentials = user.getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); ctx.setTokens(securityTokens); } catch (IOException e) { LOG.warn("Getting current user info failed when trying to launch the container" + e.getMessage()); } LOG.info("Launching container " + allocatedContainers); nmClient.startContainer(container, ctx); } for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container (while allocating) " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); } Thread.sleep(100); } // Now wait for containers to complete while (completedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(completedContainers / taskManagerCount); for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); } Thread.sleep(5000); } LOG.info("Shutting down JobManager"); jm.shutdown(); // Un-register with ResourceManager rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", ""); }
From source file:org.apache.flink.yarn.appMaster.ApplicationMaster.java
License:Apache License
/** * Run a Thread to allocate new containers until taskManagerCount * is correct again./*from w ww . ja v a 2 s . c o m*/ */ private void allocateOutstandingContainer(StringBuffer containerDiag) throws Exception { // respect custom JVM options in the YAML file final String javaOpts = GlobalConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, ""); int allocatedContainers = 0; while (allocatedContainers < taskManagerCount) { AllocateResponse response = rmClient.allocate(0); for (Container container : response.getAllocatedContainers()) { LOG.info("Got new Container for TM " + container.getId() + " on host " + container.getNodeId().getHost()); ++allocatedContainers; // Launch container by create ContainerLaunchContext ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); String tmCommand = "$JAVA_HOME/bin/java -Xmx" + heapLimit + "m " + javaOpts; if (hasLogback) { tmCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-logback" + ".log\" -Dlogback.configurationFile=file:logback.xml"; } tmCommand += " " + YarnTaskManagerRunner.class.getName() + " -configDir . " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stdout.log" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stderr.log"; ctx.setCommands(Collections.singletonList(tmCommand)); LOG.info("Starting TM with command=" + tmCommand); ctx.setLocalResources(taskManagerLocalResources); // Setup CLASSPATH for Container (=TaskTracker) Map<String, String> containerEnv = new HashMap<String, String>(); Utils.setupEnv(conf, containerEnv); //add flink.jar to class path. containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername); ctx.setEnvironment(containerEnv); UserGroupInformation user = UserGroupInformation.getCurrentUser(); try { Credentials credentials = user.getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); ctx.setTokens(securityTokens); } catch (IOException e) { LOG.warn("Getting current user info failed when trying to launch the container", e); } LOG.info("Launching container " + allocatedContainers); nmClient.startContainer(container, ctx); } for (ContainerStatus status : response.getCompletedContainersStatuses()) { ++completedContainers; LOG.info("Completed container (while allocating) " + status.getContainerId() + ". Total Completed:" + completedContainers); LOG.info("Diagnostics " + status.getDiagnostics()); // status. logDeadContainer(status, containerDiag); } Thread.sleep(100); } }
From source file:org.apache.flink.yarn.YarnFlinkResourceManager.java
License:Apache License
private void releaseYarnContainer(Container container) { LOG.info("Releasing YARN container {}", container.getId()); containersBeingReturned.put(container.getId(), container); // release the container on the node manager try {//from w w w .j a v a 2 s. c o m nodeManagerClient.stopContainer(container.getId(), container.getNodeId()); } catch (Throwable t) { // we only log this error. since the ResourceManager also gets the release // notification, the container should be eventually cleaned up LOG.error("Error while calling YARN Node Manager to release container", t); } // tell the master that the container is no longer needed resourceManagerClient.releaseAssignedContainer(container.getId()); }
From source file:org.apache.flink.yarn.YarnFlinkResourceManager.java
License:Apache License
private void containersAllocated(List<Container> containers) { final int numRequired = getDesignatedWorkerPoolSize(); final int numRegistered = getNumberOfRegisteredTaskManagers(); for (Container container : containers) { numPendingContainerRequests = Math.max(0, numPendingContainerRequests - 1); LOG.info("Received new container: {} - Remaining pending container requests: {}", container.getId(), numPendingContainerRequests); // decide whether to return the container, or whether to start a TaskManager if (numRegistered + containersInLaunch.size() < numRequired) { // start a TaskManager final ResourceID containerIdString = new ResourceID(container.getId().toString()); final long now = System.currentTimeMillis(); containersInLaunch.put(containerIdString, new YarnContainerInLaunch(container, now)); String message = "Launching TaskManager in container " + containerIdString + " on host " + container.getNodeId().getHost(); LOG.info(message);//from w w w . jav a2 s . c o m sendInfoMessage(message); try { nodeManagerClient.startContainer(container, taskManagerLaunchContext); } catch (Throwable t) { // failed to launch the container containersInLaunch.remove(containerIdString); // return container, a new one will be requested eventually LOG.error("Could not start TaskManager in container " + containerIdString, t); containersBeingReturned.put(container.getId(), container); resourceManagerClient.releaseAssignedContainer(container.getId()); } } else { // return excessive container LOG.info("Returning excess container {}", container.getId()); containersBeingReturned.put(container.getId(), container); resourceManagerClient.releaseAssignedContainer(container.getId()); } } updateProgress(); // if we are waiting for no further containers, we can go to the // regular heartbeat interval if (numPendingContainerRequests <= 0) { resourceManagerClient.setHeartbeatInterval(yarnHeartbeatIntervalMillis); } // make sure we re-check the status of workers / containers one more time at least, // in case some containers did not come up properly triggerCheckWorkers(); }
From source file:org.apache.flink.yarn.YarnFlinkResourceManagerTest.java
License:Apache License
@Test public void testYarnFlinkResourceManagerJobManagerLostLeadership() throws Exception { new JavaTestKit(system) { {/*from w w w .j av a2 s . c om*/ final Deadline deadline = new FiniteDuration(3, TimeUnit.MINUTES).fromNow(); Configuration flinkConfig = new Configuration(); YarnConfiguration yarnConfig = new YarnConfiguration(); SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null); String applicationMasterHostName = "localhost"; String webInterfaceURL = "foobar"; ContaineredTaskManagerParameters taskManagerParameters = new ContaineredTaskManagerParameters(1L, 1L, 1L, 1, new HashMap<String, String>()); ContainerLaunchContext taskManagerLaunchContext = mock(ContainerLaunchContext.class); int yarnHeartbeatIntervalMillis = 1000; int maxFailedContainers = 10; int numInitialTaskManagers = 5; final YarnResourceManagerCallbackHandler callbackHandler = new YarnResourceManagerCallbackHandler(); AMRMClientAsync<AMRMClient.ContainerRequest> resourceManagerClient = mock(AMRMClientAsync.class); NMClient nodeManagerClient = mock(NMClient.class); UUID leaderSessionID = UUID.randomUUID(); final List<Container> containerList = new ArrayList<>(); for (int i = 0; i < numInitialTaskManagers; i++) { Container mockContainer = mock(Container.class); when(mockContainer.getId()).thenReturn(ContainerId.newInstance(ApplicationAttemptId .newInstance(ApplicationId.newInstance(System.currentTimeMillis(), 1), 1), i)); when(mockContainer.getNodeId()).thenReturn(NodeId.newInstance("container", 1234)); containerList.add(mockContainer); } doAnswer(new Answer() { int counter = 0; @Override public Object answer(InvocationOnMock invocation) throws Throwable { if (counter < containerList.size()) { callbackHandler .onContainersAllocated(Collections.singletonList(containerList.get(counter++))); } return null; } }).when(resourceManagerClient).addContainerRequest(Matchers.any(AMRMClient.ContainerRequest.class)); final CompletableFuture<AkkaActorGateway> resourceManagerFuture = new CompletableFuture<>(); final CompletableFuture<AkkaActorGateway> leaderGatewayFuture = new CompletableFuture<>(); doAnswer((InvocationOnMock invocation) -> { Container container = (Container) invocation.getArguments()[0]; resourceManagerFuture.thenCombine(leaderGatewayFuture, (resourceManagerGateway, leaderGateway) -> { resourceManagerGateway.tell( new NotifyResourceStarted( YarnFlinkResourceManager.extractResourceID(container)), leaderGateway); return null; }); return null; }).when(nodeManagerClient).startContainer(Matchers.any(Container.class), Matchers.any(ContainerLaunchContext.class)); ActorRef resourceManager = null; ActorRef leader1; try { leader1 = system.actorOf(Props.create(TestingUtils.ForwardingActor.class, getRef(), Option.apply(leaderSessionID))); resourceManager = system.actorOf(Props.create(TestingYarnFlinkResourceManager.class, flinkConfig, yarnConfig, leaderRetrievalService, applicationMasterHostName, webInterfaceURL, taskManagerParameters, taskManagerLaunchContext, yarnHeartbeatIntervalMillis, maxFailedContainers, numInitialTaskManagers, callbackHandler, resourceManagerClient, nodeManagerClient)); leaderRetrievalService.notifyListener(leader1.path().toString(), leaderSessionID); final AkkaActorGateway leader1Gateway = new AkkaActorGateway(leader1, leaderSessionID); final AkkaActorGateway resourceManagerGateway = new AkkaActorGateway(resourceManager, leaderSessionID); leaderGatewayFuture.complete(leader1Gateway); resourceManagerFuture.complete(resourceManagerGateway); expectMsgClass(deadline.timeLeft(), RegisterResourceManager.class); resourceManagerGateway .tell(new RegisterResourceManagerSuccessful(leader1, Collections.emptyList())); for (int i = 0; i < containerList.size(); i++) { expectMsgClass(deadline.timeLeft(), Acknowledge.class); } Future<Object> taskManagerRegisteredFuture = resourceManagerGateway .ask(new NotifyWhenResourcesRegistered(numInitialTaskManagers), deadline.timeLeft()); Await.ready(taskManagerRegisteredFuture, deadline.timeLeft()); leaderRetrievalService.notifyListener(null, null); leaderRetrievalService.notifyListener(leader1.path().toString(), leaderSessionID); expectMsgClass(deadline.timeLeft(), RegisterResourceManager.class); resourceManagerGateway .tell(new RegisterResourceManagerSuccessful(leader1, Collections.emptyList())); for (Container container : containerList) { resourceManagerGateway.tell( new NotifyResourceStarted(YarnFlinkResourceManager.extractResourceID(container)), leader1Gateway); } for (int i = 0; i < containerList.size(); i++) { expectMsgClass(deadline.timeLeft(), Acknowledge.class); } Future<Object> numberOfRegisteredResourcesFuture = resourceManagerGateway .ask(RequestNumberOfRegisteredResources.INSTANCE, deadline.timeLeft()); int numberOfRegisteredResources = (Integer) Await.result(numberOfRegisteredResourcesFuture, deadline.timeLeft()); assertEquals(numInitialTaskManagers, numberOfRegisteredResources); } finally { if (resourceManager != null) { resourceManager.tell(PoisonPill.getInstance(), ActorRef.noSender()); } } } }; }
From source file:org.apache.flink.yarn.YarnResourceManager.java
License:Apache License
@Override public void onContainersAllocated(List<Container> containers) { for (Container container : containers) { numPendingContainerRequests = Math.max(0, numPendingContainerRequests - 1); LOG.info("Received new container: {} - Remaining pending container requests: {}", container.getId(), numPendingContainerRequests); try {//from w w w .j a v a 2 s. c o m /** Context information used to start a TaskExecutor Java process */ ContainerLaunchContext taskExecutorLaunchContext = createTaskExecutorLaunchContext( container.getResource(), container.getId().toString(), container.getNodeId().getHost()); nodeManagerClient.startContainer(container, taskExecutorLaunchContext); } catch (Throwable t) { // failed to launch the container, will release the failed one and ask for a new one LOG.error("Could not start TaskManager in container {},", container, t); resourceManagerClient.releaseAssignedContainer(container.getId()); requestYarnContainer(container.getResource(), container.getPriority()); } } if (numPendingContainerRequests <= 0) { resourceManagerClient.setHeartbeatInterval(yarnHeartbeatIntervalMillis); } }