List of usage examples for org.apache.hadoop.security UserGroupInformation getCurrentUser
@InterfaceAudience.Public @InterfaceStability.Evolving public static UserGroupInformation getCurrentUser() throws IOException
From source file:org.apache.flink.yarn.FlinkYarnClientBase.java
License:Apache License
@Override public AbstractFlinkYarnCluster deploy() throws Exception { UserGroupInformation.setConfiguration(conf); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); if (UserGroupInformation.isSecurityEnabled()) { if (!ugi.hasKerberosCredentials()) { throw new YarnDeploymentException( "In secure mode. Please provide Kerberos credentials in order to authenticate. " + "You may use kinit to authenticate and request a TGT from the Kerberos server."); }/* w w w . j a v a 2s . co m*/ return ugi.doAs(new PrivilegedExceptionAction<AbstractFlinkYarnCluster>() { @Override public AbstractFlinkYarnCluster run() throws Exception { return deployInternal(); } }); } else { return deployInternal(); } }
From source file:org.apache.flink.yarn.FlinkYarnClientBase.java
License:Apache License
/** * This method will block until the ApplicationMaster/JobManager have been * deployed on YARN.// w w w . j a v a 2s. c o m */ protected AbstractFlinkYarnCluster deployInternal() throws Exception { isReadyForDeployment(); LOG.info("Using values:"); LOG.info("\tTaskManager count = {}", taskManagerCount); LOG.info("\tJobManager memory = {}", jobManagerMemoryMb); LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb); // Create application via yarnClient yarnApplication = yarnClient.createApplication(); GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse(); // ------------------ Add dynamic properties to local flinkConfiguraton ------ Map<String, String> dynProperties = CliFrontend.getDynamicProperties(dynamicPropertiesEncoded); for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) { flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue()); } try { org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration); } catch (IOException e) { throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e); } // ------------------ Check if the specified queue exists -------------- try { List<QueueInfo> queues = yarnClient.getAllQueues(); if (queues.size() > 0 && this.yarnQueue != null) { // check only if there are queues configured in yarn and for this session. boolean queueFound = false; for (QueueInfo queue : queues) { if (queue.getQueueName().equals(this.yarnQueue)) { queueFound = true; break; } } if (!queueFound) { String queueNames = ""; for (QueueInfo queue : queues) { queueNames += queue.getQueueName() + ", "; } LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: " + queueNames); } } else { LOG.debug("The YARN cluster does not have any queues configured"); } } catch (Throwable e) { LOG.warn("Error while getting queue information from YARN: " + e.getMessage()); if (LOG.isDebugEnabled()) { LOG.debug("Error details", e); } } // ------------------ Check if the YARN Cluster has the requested resources -------------- // the yarnMinAllocationMB specifies the smallest possible container allocation size. // all allocations below this value are automatically set to this value. final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0); if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) { LOG.warn("The JobManager or TaskManager memory is below the smallest possible YARN Container size. " + "The value of 'yarn.scheduler.minimum-allocation-mb' is '" + yarnMinAllocationMB + "'. Please increase the memory size." + "YARN will allocate the smaller containers but the scheduler will account for the minimum-allocation-mb, maybe not all instances " + "you requested will start."); } // set the memory to minAllocationMB to do the next checks correctly if (jobManagerMemoryMb < yarnMinAllocationMB) { jobManagerMemoryMb = yarnMinAllocationMB; } if (taskManagerMemoryMb < yarnMinAllocationMB) { taskManagerMemoryMb = yarnMinAllocationMB; } Resource maxRes = appResponse.getMaximumResourceCapability(); final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the 'yarn.nodemanager.resource.memory-mb' configuration values\n"; if (jobManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(); throw new YarnDeploymentException( "The cluster does not have the requested resources for the JobManager available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb + "MB. " + NOTE); } if (taskManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(); throw new YarnDeploymentException( "The cluster does not have the requested resources for the TaskManagers available!\n" + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb + "MB. " + NOTE); } final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, but maybe not all TaskManagers are " + "connecting from the beginning because the resources are currently not available in the cluster. " + "The allocation might take more time than usual because the Flink YARN client needs to wait until " + "the resources become available."; int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount; ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient); if (freeClusterMem.totalFreeMemory < totalMemoryRequired) { LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. " + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC); } if (taskManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } if (jobManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn( "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } // ----------------- check if the requested containers fit into the cluster. int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length); // first, allocate the jobManager somewhere. if (!allocateResource(nmFree, jobManagerMemoryMb)) { LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. " + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC); } // allocate TaskManagers for (int i = 0; i < taskManagerCount; i++) { if (!allocateResource(nmFree, taskManagerMemoryMb)) { LOG.warn("There is not enough memory available in the YARN cluster. " + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. " + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n" + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/" + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: " + Arrays.toString(nmFree) + NOTE_RSC); } } // ------------------ Prepare Application Master Container ------------------------------ // respect custom JVM options in the YAML file final String javaOpts = flinkConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, ""); String logbackFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME; boolean hasLogback = new File(logbackFile).exists(); String log4jFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME; boolean hasLog4j = new File(log4jFile).exists(); if (hasLogback) { shipFiles.add(new File(logbackFile)); } if (hasLog4j) { shipFiles.add(new File(log4jFile)); } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); String amCommand = "$JAVA_HOME/bin/java" + " -Xmx" + Utils.calculateHeapSize(jobManagerMemoryMb, flinkConfiguration) + "M " + javaOpts; if (hasLogback || hasLog4j) { amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.log\""; if (hasLogback) { amCommand += " -Dlogback.configurationFile=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME; } if (hasLog4j) { amCommand += " -Dlog4j.configuration=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME; } } amCommand += " " + getApplicationMasterClass().getName() + " " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.out" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.err"; amContainer.setCommands(Collections.singletonList(amCommand)); LOG.debug("Application Master start command: " + amCommand); // intialize HDFS // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path final FileSystem fs = FileSystem.get(conf); // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method. if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) { LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values." + "The Flink YARN client needs to store its files in a distributed file system"); } // Set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext(); if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) { // activate re-execution of failed applications appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); activateHighAvailabilitySupport(appContext); } else { // set number of application retries to 1 in the default case appContext .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1)); } final ApplicationId appId = appContext.getApplicationId(); // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar, fs.getHomeDirectory()); Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf, fs.getHomeDirectory()); Map<String, LocalResource> localResources = new HashMap<>(2); localResources.put("flink.jar", appMasterJar); localResources.put("flink-conf.yaml", flinkConf); // setup security tokens (code from apache storm) final Path[] paths = new Path[2 + shipFiles.size()]; StringBuilder envShipFileList = new StringBuilder(); // upload ship files for (int i = 0; i < shipFiles.size(); i++) { File shipFile = shipFiles.get(i); LocalResource shipResources = Records.newRecord(LocalResource.class); Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath()); paths[2 + i] = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory()); localResources.put(shipFile.getName(), shipResources); envShipFileList.append(paths[2 + i]); if (i + 1 < shipFiles.size()) { envShipFileList.append(','); } } paths[0] = remotePathJar; paths[1] = remotePathConf; sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/"); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); fs.setPermission(sessionFilesDir, permission); // set permission for path. Utils.setTokensFor(amContainer, paths, conf); amContainer.setLocalResources(localResources); fs.close(); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<>(); // set user specified app master environment variables appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX, flinkConfiguration)); // set classpath from YARN configuration Utils.setupEnv(conf, appMasterEnv); // set Flink on YARN internal configuration values appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount)); appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb)); appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString()); appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName()); appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots)); appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached)); if (dynamicPropertiesEncoded != null) { appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded); } amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(jobManagerMemoryMb); capability.setVirtualCores(1); String name; if (customName == null) { name = "Flink session with " + taskManagerCount + " TaskManagers"; if (detached) { name += " (detached)"; } } else { name = customName; } appContext.setApplicationName(name); // application name appContext.setApplicationType("Apache Flink"); appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); if (yarnQueue != null) { appContext.setQueue(yarnQueue); } // add a hook to clean up in case deployment fails Runtime.getRuntime().addShutdownHook(deploymentFailureHook); LOG.info("Submitting application master " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for the cluster to be allocated"); int waittime = 0; loop: while (true) { ApplicationReport report; try { report = yarnClient.getApplicationReport(appId); } catch (IOException e) { throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage()); } YarnApplicationState appState = report.getYarnApplicationState(); switch (appState) { case FAILED: case FINISHED: case KILLED: throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n" + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n" + "yarn logs -applicationId " + appId); //break .. case RUNNING: LOG.info("YARN application has been deployed successfully."); break loop; default: LOG.info("Deploying cluster, current state " + appState); if (waittime > 60000) { LOG.info( "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster"); } } waittime += 1000; Thread.sleep(1000); } // print the application id for user to cancel themselves. if (isDetached()) { LOG.info("The Flink YARN client has been started in detached mode. In order to stop " + "Flink on YARN, use the following command or a YARN web interface to stop " + "it:\nyarn application -kill " + appId + "\nPlease also note that the " + "temporary files of the YARN session in the home directoy will not be removed."); } // since deployment was successful, remove the hook try { Runtime.getRuntime().removeShutdownHook(deploymentFailureHook); } catch (IllegalStateException e) { // we're already in the shut down hook. } // the Flink cluster is deployed in YARN. Represent cluster return new FlinkYarnCluster(yarnClient, appId, conf, flinkConfiguration, sessionFilesDir, detached); }
From source file:org.apache.flink.yarn.YarnApplicationMasterRunner.java
License:Apache License
/** * The instance entry point for the YARN application master. Obtains user group * information and calls the main work method {@link #runApplicationMaster()} as a * privileged action./*w w w .j a va2 s .c o m*/ * * @param args The command line arguments. * @return The process exit code. */ protected int run(String[] args) { try { LOG.debug("All environment variables: {}", ENV); final String yarnClientUsername = ENV.get(YarnConfigKeys.ENV_CLIENT_USERNAME); require(yarnClientUsername != null, "YARN client user name environment variable {} not set", YarnConfigKeys.ENV_CLIENT_USERNAME); final UserGroupInformation currentUser; try { currentUser = UserGroupInformation.getCurrentUser(); } catch (Throwable t) { throw new Exception("Cannot access UserGroupInformation information for current user", t); } LOG.info("YARN daemon runs as user {}. Running Flink Application Master/JobManager as user {}", currentUser.getShortUserName(), yarnClientUsername); UserGroupInformation ugi = UserGroupInformation.createRemoteUser(yarnClientUsername); // transfer all security tokens, for example for authenticated HDFS and HBase access for (Token<?> token : currentUser.getTokens()) { ugi.addToken(token); } // run the actual work in a secured privileged action return ugi.doAs(new PrivilegedAction<Integer>() { @Override public Integer run() { return runApplicationMaster(); } }); } catch (Throwable t) { // make sure that everything whatever ends up in the log LOG.error("YARN Application Master initialization failed", t); return INIT_ERROR_EXIT_CODE; } }
From source file:org.apache.flink.yarn.YarnApplicationMasterRunner.java
License:Apache License
/** * Creates the launch context, which describes how to bring up a TaskManager process in * an allocated YARN container./*from ww w . j a v a2 s .c om*/ * * <p>This code is extremely YARN specific and registers all the resources that the TaskManager * needs (such as JAR file, config file, ...) and all environment variables in a YARN * container launch context. The launch context then ensures that those resources will be * copied into the containers transient working directory. * * <p>We do this work before we start the ResourceManager actor in order to fail early if * any of the operations here fail. * * @param flinkConfig * The Flink configuration object. * @param yarnConfig * The YARN configuration object. * @param env * The environment variables. * @param tmParams * The TaskManager container memory parameters. * @param taskManagerConfig * The configuration for the TaskManagers. * @param workingDirectory * The current application master container's working directory. * @param taskManagerMainClass * The class with the main method. * @param log * The logger. * * @return The launch context for the TaskManager processes. * * @throws Exception Thrown if teh launch context could not be created, for example if * the resources could not be copied. */ public static ContainerLaunchContext createTaskManagerContext(Configuration flinkConfig, YarnConfiguration yarnConfig, Map<String, String> env, ContaineredTaskManagerParameters tmParams, Configuration taskManagerConfig, String workingDirectory, Class<?> taskManagerMainClass, Logger log) throws Exception { log.info("Setting up resources for TaskManagers"); // get and validate all relevant variables String remoteFlinkJarPath = env.get(YarnConfigKeys.FLINK_JAR_PATH); require(remoteFlinkJarPath != null, "Environment variable %s not set", YarnConfigKeys.FLINK_JAR_PATH); String appId = env.get(YarnConfigKeys.ENV_APP_ID); require(appId != null, "Environment variable %s not set", YarnConfigKeys.ENV_APP_ID); String clientHomeDir = env.get(YarnConfigKeys.ENV_CLIENT_HOME_DIR); require(clientHomeDir != null, "Environment variable %s not set", YarnConfigKeys.ENV_CLIENT_HOME_DIR); String shipListString = env.get(YarnConfigKeys.ENV_CLIENT_SHIP_FILES); require(shipListString != null, "Environment variable %s not set", YarnConfigKeys.ENV_CLIENT_SHIP_FILES); String yarnClientUsername = env.get(YarnConfigKeys.ENV_CLIENT_USERNAME); require(yarnClientUsername != null, "Environment variable %s not set", YarnConfigKeys.ENV_CLIENT_USERNAME); // obtain a handle to the file system used by YARN final org.apache.hadoop.fs.FileSystem yarnFileSystem; try { yarnFileSystem = org.apache.hadoop.fs.FileSystem.get(yarnConfig); } catch (IOException e) { throw new Exception("Could not access YARN's default file system", e); } // register Flink Jar with remote HDFS LocalResource flinkJar = Records.newRecord(LocalResource.class); { Path remoteJarPath = new Path(remoteFlinkJarPath); Utils.registerLocalResource(yarnFileSystem, remoteJarPath, flinkJar); } // register conf with local fs LocalResource flinkConf = Records.newRecord(LocalResource.class); { // write the TaskManager configuration to a local file final File taskManagerConfigFile = new File(workingDirectory, UUID.randomUUID() + "-taskmanager-conf.yaml"); LOG.debug("Writing TaskManager configuration to {}", taskManagerConfigFile.getAbsolutePath()); BootstrapTools.writeConfiguration(taskManagerConfig, taskManagerConfigFile); Utils.setupLocalResource(yarnFileSystem, appId, new Path(taskManagerConfigFile.toURI()), flinkConf, new Path(clientHomeDir)); log.info("Prepared local resource for modified yaml: {}", flinkConf); } Map<String, LocalResource> taskManagerLocalResources = new HashMap<>(); taskManagerLocalResources.put("flink.jar", flinkJar); taskManagerLocalResources.put("flink-conf.yaml", flinkConf); // prepare additional files to be shipped for (String pathStr : shipListString.split(",")) { if (!pathStr.isEmpty()) { LocalResource resource = Records.newRecord(LocalResource.class); Path path = new Path(pathStr); Utils.registerLocalResource(yarnFileSystem, path, resource); taskManagerLocalResources.put(path.getName(), resource); } } // now that all resources are prepared, we can create the launch context log.info("Creating container launch context for TaskManagers"); boolean hasLogback = new File(workingDirectory, "logback.xml").exists(); boolean hasLog4j = new File(workingDirectory, "log4j.properties").exists(); String launchCommand = BootstrapTools.getTaskManagerShellCommand(flinkConfig, tmParams, ".", ApplicationConstants.LOG_DIR_EXPANSION_VAR, hasLogback, hasLog4j, taskManagerMainClass); log.info("Starting TaskManagers with command: " + launchCommand); ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); ctx.setCommands(Collections.singletonList(launchCommand)); ctx.setLocalResources(taskManagerLocalResources); Map<String, String> containerEnv = new HashMap<>(); containerEnv.putAll(tmParams.taskManagerEnv()); // add YARN classpath, etc to the container environment Utils.setupEnv(yarnConfig, containerEnv); containerEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME, yarnClientUsername); ctx.setEnvironment(containerEnv); try { UserGroupInformation user = UserGroupInformation.getCurrentUser(); Credentials credentials = user.getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); ctx.setTokens(securityTokens); } catch (Throwable t) { log.error("Getting current user info failed when trying to launch the container", t); } return ctx; }
From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java
License:Apache License
/** * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213). *///from ww w. java 2 s .c om @Test(timeout = 100000) // timeout after 100 seconds public void testTaskManagerFailure() { LOG.info("Starting testTaskManagerFailure()"); Runner runner = startWithArgs( new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "-s", "3", // set the slots 3 to check if the vCores are set properly! "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" }, "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION); Assert.assertEquals(2, getRunningContainers()); // ------------------------ Test if JobManager web interface is accessible ------- YarnClient yc = null; try { yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); Assert.assertEquals(1, apps.size()); // Only one running ApplicationReport app = apps.get(0); Assert.assertEquals("customName", app.getName()); String url = app.getTrackingUrl(); if (!url.endsWith("/")) { url += "/"; } if (!url.startsWith("http://")) { url = "http://" + url; } LOG.info("Got application URL from YARN {}", url); String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/"); JsonNode parsedTMs = new ObjectMapper().readTree(response); ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers"); Assert.assertNotNull(taskManagers); Assert.assertEquals(1, taskManagers.size()); Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt()); // get the configuration from webinterface & check if the dynamic properties from YARN show up there. String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config"); Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig); Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value")); Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers")); Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES)); // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface // first, get the hostname/port String oC = outContent.toString(); Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)"); Matcher matches = p.matcher(oC); String hostname = null; String port = null; while (matches.find()) { hostname = matches.group(1).toLowerCase(); port = matches.group(2); } LOG.info("Extracted hostname:port: {} {}", hostname, port); Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)); Assert.assertEquals("unable to find port in " + jsonConfig, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY)); // test logfile access String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log"); Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster")); Assert.assertTrue(logs.contains("Starting JobManager")); Assert.assertTrue(logs.contains("Starting JobManager Web Frontend")); } catch (Throwable e) { LOG.warn("Error while running test", e); Assert.fail(e.getMessage()); } // ------------------------ Kill container with TaskManager and check if vcores are set correctly ------- // find container id of taskManager: ContainerId taskManagerContainer = null; NodeManager nodeManager = null; UserGroupInformation remoteUgi = null; NMTokenIdentifier nmIdent = null; try { remoteUgi = UserGroupInformation.getCurrentUser(); } catch (IOException e) { LOG.warn("Unable to get curr user", e); Assert.fail(); } for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) { NodeManager nm = yarnCluster.getNodeManager(nmId); ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers(); for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) { String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands()); if (command.contains(YarnTaskManager.class.getSimpleName())) { taskManagerContainer = entry.getKey(); nodeManager = nm; nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0); // allow myself to do stuff with the container // remoteUgi.addCredentials(entry.getValue().getCredentials()); remoteUgi.addTokenIdentifier(nmIdent); } } sleep(500); } Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer); Assert.assertNotNull("Illegal state", nodeManager); try { List<NodeReport> nodeReports = yc.getNodeReports(NodeState.RUNNING); // we asked for one node with 2 vcores so we expect 2 vcores int userVcores = 0; for (NodeReport rep : nodeReports) { userVcores += rep.getUsed().getVirtualCores(); } Assert.assertEquals(2, userVcores); } catch (Exception e) { Assert.fail("Test failed: " + e.getMessage()); } yc.stop(); List<ContainerId> toStop = new LinkedList<ContainerId>(); toStop.add(taskManagerContainer); StopContainersRequest scr = StopContainersRequest.newInstance(toStop); try { nodeManager.getNMContext().getContainerManager().stopContainers(scr); } catch (Throwable e) { LOG.warn("Error stopping container", e); Assert.fail("Error stopping container: " + e.getMessage()); } // stateful termination check: // wait until we saw a container being killed and AFTERWARDS a new one launched boolean ok = false; do { LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString()); String o = errContent.toString(); int killedOff = o.indexOf("Container killed by the ApplicationMaster"); if (killedOff != -1) { o = o.substring(killedOff); ok = o.indexOf("Launching TaskManager") > 0; } sleep(1000); } while (!ok); // send "stop" command to command line interface runner.sendStop(); // wait for the thread to stop try { runner.join(1000); } catch (InterruptedException e) { LOG.warn("Interrupted while stopping runner", e); } LOG.warn("stopped"); // ----------- Send output to logger System.setOut(originalStdout); System.setErr(originalStderr); String oC = outContent.toString(); String eC = errContent.toString(); LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC); LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC); // ------ Check if everything happened correctly Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster")); Assert.assertTrue("Expect to see failed container", eC.contains("Container killed by the ApplicationMaster")); Assert.assertTrue("Expect to see new container started", eC.contains("Launching TaskManager") && eC.contains("on host")); // cleanup auth for the subsequent tests. remoteUgi.getTokenIdentifiers().remove(nmIdent); LOG.info("Finished testTaskManagerFailure()"); }
From source file:org.apache.flink.yarn.YarnTaskExecutorRunner.java
License:Apache License
/** * The instance entry point for the YARN task executor. Obtains user group * information and calls the main work method {@link #runTaskExecutor(org.apache.flink.configuration.Configuration)} as a * privileged action.//w w w .j a v a2 s. c om * * @param args The command line arguments. * @return The process exit code. */ protected int run(String[] args) { try { LOG.debug("All environment variables: {}", ENV); final String yarnClientUsername = ENV.get(YarnConfigKeys.ENV_HADOOP_USER_NAME); final String localDirs = ENV.get(Environment.LOCAL_DIRS.key()); LOG.info("Current working/local Directory: {}", localDirs); final String currDir = ENV.get(Environment.PWD.key()); LOG.info("Current working Directory: {}", currDir); final String remoteKeytabPath = ENV.get(YarnConfigKeys.KEYTAB_PATH); LOG.info("TM: remote keytab path obtained {}", remoteKeytabPath); final String remoteKeytabPrincipal = ENV.get(YarnConfigKeys.KEYTAB_PRINCIPAL); LOG.info("TM: remote keytab principal obtained {}", remoteKeytabPrincipal); final Configuration configuration = GlobalConfiguration.loadConfiguration(currDir); FileSystem.setDefaultScheme(configuration); // configure local directory String flinkTempDirs = configuration.getString(ConfigConstants.TASK_MANAGER_TMP_DIR_KEY, null); if (flinkTempDirs == null) { LOG.info("Setting directories for temporary file " + localDirs); configuration.setString(ConfigConstants.TASK_MANAGER_TMP_DIR_KEY, localDirs); } else { LOG.info("Overriding YARN's temporary file directories with those " + "specified in the Flink config: " + flinkTempDirs); } // tell akka to die in case of an error configuration.setBoolean(ConfigConstants.AKKA_JVM_EXIT_ON_FATAL_ERROR, true); String keytabPath = null; if (remoteKeytabPath != null) { File f = new File(currDir, Utils.KEYTAB_FILE_NAME); keytabPath = f.getAbsolutePath(); LOG.info("keytab path: {}", keytabPath); } UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); LOG.info("YARN daemon is running as: {} Yarn client user obtainer: {}", currentUser.getShortUserName(), yarnClientUsername); org.apache.hadoop.conf.Configuration hadoopConfiguration = null; //To support Yarn Secure Integration Test Scenario File krb5Conf = new File(currDir, Utils.KRB5_FILE_NAME); if (krb5Conf.exists() && krb5Conf.canRead()) { String krb5Path = krb5Conf.getAbsolutePath(); LOG.info("KRB5 Conf: {}", krb5Path); hadoopConfiguration = new org.apache.hadoop.conf.Configuration(); hadoopConfiguration.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos"); hadoopConfiguration.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, "true"); } SecurityUtils.SecurityConfiguration sc; if (hadoopConfiguration != null) { sc = new SecurityUtils.SecurityConfiguration(configuration, hadoopConfiguration); } else { sc = new SecurityUtils.SecurityConfiguration(configuration); } if (keytabPath != null && remoteKeytabPrincipal != null) { configuration.setString(SecurityOptions.KERBEROS_LOGIN_KEYTAB, keytabPath); configuration.setString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL, remoteKeytabPrincipal); } SecurityUtils.install(sc); return SecurityUtils.getInstalledContext().runSecured(new Callable<Integer>() { @Override public Integer call() throws Exception { return runTaskExecutor(configuration); } }); } catch (Throwable t) { // make sure that everything whatever ends up in the log LOG.error("YARN Application Master initialization failed", t); return INIT_ERROR_EXIT_CODE; } }
From source file:org.apache.flink.yarn.YarnTaskManagerRunner.java
License:Apache License
public static void main(final String[] args) throws IOException { Map<String, String> envs = System.getenv(); final String yarnClientUsername = envs.get(Client.ENV_CLIENT_USERNAME); final String localDirs = envs.get(Environment.LOCAL_DIRS.key()); // configure local directory final String[] newArgs = Arrays.copyOf(args, args.length + 2); newArgs[newArgs.length - 2] = "-" + TaskManager.ARG_CONF_DIR; newArgs[newArgs.length - 1] = localDirs; LOG.info("Setting log path " + localDirs); LOG.info("YARN daemon runs as '" + UserGroupInformation.getCurrentUser().getShortUserName() + "' setting" + " user to execute Flink TaskManager to '" + yarnClientUsername + "'"); UserGroupInformation ugi = UserGroupInformation.createRemoteUser(yarnClientUsername); for (Token<? extends TokenIdentifier> toks : UserGroupInformation.getCurrentUser().getTokens()) { ugi.addToken(toks);/*from www . j a va 2 s . co m*/ } ugi.doAs(new PrivilegedAction<Object>() { @Override public Object run() { try { TaskManager.main(newArgs); } catch (Exception e) { e.printStackTrace(); } return null; } }); }
From source file:org.apache.flink.yarn.YarnTaskManagerRunnerFactory.java
License:Apache License
/** * Creates a {@link YarnTaskManagerRunnerFactory.Runner}. */// www . java 2 s . co m public static Runner create(String[] args, final Class<? extends YarnTaskManager> taskManager, Map<String, String> envs) throws IOException { EnvironmentInformation.logEnvironmentInfo(LOG, "YARN TaskManager", args); SignalHandler.register(LOG); JvmShutdownSafeguard.installAsShutdownHook(LOG); // try to parse the command line arguments final Configuration configuration; try { configuration = TaskManager.parseArgsAndLoadConfig(args); } catch (Throwable t) { LOG.error(t.getMessage(), t); System.exit(TaskManager.STARTUP_FAILURE_RETURN_CODE()); return null; } // read the environment variables for YARN final String yarnClientUsername = envs.get(YarnConfigKeys.ENV_HADOOP_USER_NAME); final String localDirs = envs.get(Environment.LOCAL_DIRS.key()); LOG.info("Current working/local Directory: {}", localDirs); final String currDir = envs.get(Environment.PWD.key()); LOG.info("Current working Directory: {}", currDir); final String remoteKeytabPrincipal = envs.get(YarnConfigKeys.KEYTAB_PRINCIPAL); LOG.info("TM: remoteKeytabPrincipal obtained {}", remoteKeytabPrincipal); BootstrapTools.updateTmpDirectoriesInConfiguration(configuration, localDirs); // tell akka to die in case of an error configuration.setBoolean(AkkaOptions.JVM_EXIT_ON_FATAL_ERROR, true); UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); LOG.info("YARN daemon is running as: {} Yarn client user obtainer: {}", currentUser.getShortUserName(), yarnClientUsername); // Infer the resource identifier from the environment variable String containerID = Preconditions.checkNotNull(envs.get(YarnFlinkResourceManager.ENV_FLINK_CONTAINER_ID)); final ResourceID resourceId = new ResourceID(containerID); LOG.info("ResourceID assigned for this container: {}", resourceId); File f = new File(currDir, Utils.KEYTAB_FILE_NAME); if (remoteKeytabPrincipal != null && f.exists()) { // set keytab principal and replace path with the local path of the shipped keytab file in NodeManager configuration.setString(SecurityOptions.KERBEROS_LOGIN_KEYTAB, f.getAbsolutePath()); configuration.setString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL, remoteKeytabPrincipal); } try { SecurityConfiguration sc = new SecurityConfiguration(configuration); SecurityUtils.install(sc); return new Runner(configuration, resourceId, taskManager); } catch (Exception e) { LOG.error("Exception occurred while building Task Manager runner", e); throw new RuntimeException(e); } }
From source file:org.apache.flume.auth.SimpleAuthenticator.java
License:Apache License
@Override public synchronized PrivilegedExecutor proxyAs(String proxyUserName) { if (proxyUserName == null || proxyUserName.isEmpty()) { return this; }//from w w w .j av a2 s . c o m if (proxyCache.get(proxyUserName) == null) { UserGroupInformation proxyUgi; try { proxyUgi = UserGroupInformation.createProxyUser(proxyUserName, UserGroupInformation.getCurrentUser()); } catch (IOException e) { throw new SecurityException("Unable to create proxy User", e); } proxyCache.put(proxyUserName, new UGIExecutor(proxyUgi)); } return proxyCache.get(proxyUserName); }
From source file:org.apache.giraph.yarn.GiraphApplicationMaster.java
License:Apache License
/** * Populate allTokens with the tokens received * @return// www . j a v a 2s . com */ private void getAllTokens() throws IOException { Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); // Now remove the AM->RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); if (LOG.isDebugEnabled()) { LOG.debug("Token type :" + token.getKind()); } if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); }