List of usage examples for org.apache.hadoop.yarn.api.records ApplicationId toString
@Override
public String toString()
From source file:org.apache.ambari.view.slider.SliderAppsViewControllerImpl.java
License:Apache License
@Override public void thawApp(final String appId) throws YarnException, IOException, InterruptedException { ApplicationId applicationId = invokeSliderClientRunnable(new SliderClientContextRunnable<ApplicationId>() { @Override//w w w. j av a2s . c o m public ApplicationId run(SliderClient sliderClient) throws YarnException, IOException, InterruptedException { Set<String> properties = new HashSet<String>(); properties.add("id"); properties.add("name"); final SliderApp sliderApp = getSliderApp(appId, properties); if (sliderApp == null) throw new ApplicationNotFoundException(appId); ActionThawArgs thawArgs = new ActionThawArgs(); sliderClient.actionThaw(sliderApp.getName(), thawArgs); return sliderClient.applicationId; } }); logger.info("Thawed Slider App [" + appId + "] with response: " + applicationId.toString()); }
From source file:org.apache.drill.yarn.client.KillCommand.java
License:Apache License
@Override public void run() throws ClientException { ApplicationId appId = checkAppId(); if (appId == null) { System.exit(-1);/*from ww w. ja va 2 s. c o m*/ } YarnRMClient client = new YarnRMClient(appId); try { client.killApplication(); } catch (YarnClientException e) { throw new ClientException(e); } System.out.println("Kill request sent, waiting for shut-down."); try { client.waitForCompletion(); } catch (YarnClientException e) { throw new ClientException("Wait for completion failed for app id: " + appId.toString(), e); } System.out.println("Application completed: " + appId.toString()); }
From source file:org.apache.drill.yarn.client.StartCommand.java
License:Apache License
/** * Check if an application ID file exists. If it does, check if an application * is running. If an app is running, then we can't start a new one. If the app * is not running, then clean up the "orphan" app id file. * * @throws ClientException//from w ww . j a v a 2s .c o m */ private void checkExistingApp() throws ClientException { File appIdFile = getAppIdFile(); if (!appIdFile.exists()) { return; } // File exists. Ask YARN about status. Reporter reporter; ApplicationId appId; try { System.out.println("Found app ID file: " + appIdFile.getAbsolutePath()); appId = checkAppId(); System.out.print("Checking application ID: " + appId.toString() + "..."); YarnRMClient client = new YarnRMClient(appId); reporter = new Reporter(client); reporter.getReport(); } catch (ClientException e) { // This exception occurs when we ask for a report about an application // that // YARN does not know about. (YARN has likely been restarted.) System.out.println(" Not running."); appIdFile.delete(); return; } // YARN knows about the application. But, was it stopped, perhaps from the // web UI? if (reporter.isStopped()) { System.out.println(" Completed with state " + reporter.getState()); appIdFile.delete(); return; } // The app (or another one with the same App ID) is running. System.out.println(" Still running!"); throw new ClientException("Error: AM already running as Application ID: " + appId); }
From source file:org.apache.flink.yarn.AbstractYarnClusterDescriptor.java
License:Apache License
public ApplicationReport startAppMaster(JobGraph jobGraph, YarnClient yarnClient, YarnClientApplication yarnApplication) throws Exception { // ------------------ Set default file system scheme ------------------------- try {//w w w . j a va 2 s. co m org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration); } catch (IOException e) { throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e); } // initialize file system // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path final FileSystem fs = FileSystem.get(conf); // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method. if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) { LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values." + "The Flink YARN client needs to store its files in a distributed file system"); } ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext(); Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size()); for (File file : shipFiles) { effectiveShipFiles.add(file.getAbsoluteFile()); } //check if there is a logback or log4j file File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME); final boolean hasLogback = logbackFile.exists(); if (hasLogback) { effectiveShipFiles.add(logbackFile); } File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME); final boolean hasLog4j = log4jFile.exists(); if (hasLog4j) { effectiveShipFiles.add(log4jFile); if (hasLogback) { // this means there is already a logback configuration file --> fail LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and " + "Logback configuration files. Please delete or rename one of them."); } } addLibFolderToShipFiles(effectiveShipFiles); // add the user jar to the classpath of the to-be-created cluster if (userJarFiles != null) { effectiveShipFiles.addAll(userJarFiles); } // Set-up ApplicationSubmissionContext for the application final ApplicationId appId = appContext.getApplicationId(); // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------ String zkNamespace = getZookeeperNamespace(); // no user specified cli argument for namespace? if (zkNamespace == null || zkNamespace.isEmpty()) { // namespace defined in config? else use applicationId as default. zkNamespace = flinkConfiguration.getString(HighAvailabilityOptions.HA_CLUSTER_ID, String.valueOf(appId)); setZookeeperNamespace(zkNamespace); } flinkConfiguration.setString(HighAvailabilityOptions.HA_CLUSTER_ID, zkNamespace); if (HighAvailabilityMode.isHighAvailabilityModeActivated(flinkConfiguration)) { // activate re-execution of failed applications appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); activateHighAvailabilitySupport(appContext); } else { // set number of application retries to 1 in the default case appContext .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1)); } // local resource map for Yarn final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size()); // list of remote paths (after upload) final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size()); // classpath assembler final StringBuilder classPathBuilder = new StringBuilder(); // ship list that enables reuse of resources for task manager containers StringBuilder envShipFileList = new StringBuilder(); // upload and register ship files for (File shipFile : effectiveShipFiles) { LocalResource shipResources = Records.newRecord(LocalResource.class); Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath()); Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory()); paths.add(remotePath); localResources.put(shipFile.getName(), shipResources); if (shipFile.isDirectory()) { // add directories to the classpath java.nio.file.Path shipPath = shipFile.toPath(); final java.nio.file.Path parentPath = shipPath.getParent(); Files.walkFileTree(shipPath, new SimpleFileVisitor<java.nio.file.Path>() { @Override public FileVisitResult preVisitDirectory(java.nio.file.Path dir, BasicFileAttributes attrs) throws IOException { super.preVisitDirectory(dir, attrs); java.nio.file.Path relativePath = parentPath.relativize(dir); classPathBuilder.append(relativePath).append(File.separator).append("*") .append(File.pathSeparator); return FileVisitResult.CONTINUE; } }); } else { // add files to the classpath classPathBuilder.append(shipFile.getName()).append(File.pathSeparator); } envShipFileList.append(remotePath).append(","); } // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar, fs.getHomeDirectory()); Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf, fs.getHomeDirectory()); localResources.put("flink.jar", appMasterJar); localResources.put("flink-conf.yaml", flinkConf); paths.add(remotePathJar); classPathBuilder.append("flink.jar").append(File.pathSeparator); paths.add(remotePathConf); classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator); // write job graph to tmp file and add it to local resource // TODO: server use user main method to generate job graph if (jobGraph != null) { try { File fp = File.createTempFile(appId.toString(), null); fp.deleteOnExit(); try (FileOutputStream output = new FileOutputStream(fp); ObjectOutputStream obOutput = new ObjectOutputStream(output);) { obOutput.writeObject(jobGraph); } LocalResource jobgraph = Records.newRecord(LocalResource.class); Path remoteJobGraph = Utils.setupLocalResource(fs, appId.toString(), new Path(fp.toURI()), jobgraph, fs.getHomeDirectory()); localResources.put("job.graph", jobgraph); paths.add(remoteJobGraph); classPathBuilder.append("job.graph").append(File.pathSeparator); } catch (Exception e) { LOG.warn("Add job graph to local resource fail"); throw e; } } sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/"); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); fs.setPermission(sessionFilesDir, permission); // set permission for path. //To support Yarn Secure Integration Test Scenario //In Integration test setup, the Yarn containers created by YarnMiniCluster does not have the Yarn site XML //and KRB5 configuration files. We are adding these files as container local resources for the container //applications (JM/TMs) to have proper secure cluster setup Path remoteKrb5Path = null; Path remoteYarnSiteXmlPath = null; boolean hasKrb5 = false; if (System.getenv("IN_TESTS") != null) { String krb5Config = System.getProperty("java.security.krb5.conf"); if (krb5Config != null && krb5Config.length() != 0) { File krb5 = new File(krb5Config); LOG.info("Adding KRB5 configuration {} to the AM container local resource bucket", krb5.getAbsolutePath()); LocalResource krb5ConfResource = Records.newRecord(LocalResource.class); Path krb5ConfPath = new Path(krb5.getAbsolutePath()); remoteKrb5Path = Utils.setupLocalResource(fs, appId.toString(), krb5ConfPath, krb5ConfResource, fs.getHomeDirectory()); localResources.put(Utils.KRB5_FILE_NAME, krb5ConfResource); File f = new File(System.getenv("YARN_CONF_DIR"), Utils.YARN_SITE_FILE_NAME); LOG.info("Adding Yarn configuration {} to the AM container local resource bucket", f.getAbsolutePath()); LocalResource yarnConfResource = Records.newRecord(LocalResource.class); Path yarnSitePath = new Path(f.getAbsolutePath()); remoteYarnSiteXmlPath = Utils.setupLocalResource(fs, appId.toString(), yarnSitePath, yarnConfResource, fs.getHomeDirectory()); localResources.put(Utils.YARN_SITE_FILE_NAME, yarnConfResource); hasKrb5 = true; } } // setup security tokens LocalResource keytabResource = null; Path remotePathKeytab = null; String keytab = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_KEYTAB); if (keytab != null) { LOG.info("Adding keytab {} to the AM container local resource bucket", keytab); keytabResource = Records.newRecord(LocalResource.class); Path keytabPath = new Path(keytab); remotePathKeytab = Utils.setupLocalResource(fs, appId.toString(), keytabPath, keytabResource, fs.getHomeDirectory()); localResources.put(Utils.KEYTAB_FILE_NAME, keytabResource); } final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j, hasKrb5); if (UserGroupInformation.isSecurityEnabled() && keytab == null) { //set tokens only when keytab is not provided LOG.info("Adding delegation token to the AM container.."); Utils.setTokensFor(amContainer, paths, conf); } amContainer.setLocalResources(localResources); fs.close(); // Setup CLASSPATH and environment variables for ApplicationMaster final Map<String, String> appMasterEnv = new HashMap<>(); // set user specified app master environment variables appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX, flinkConfiguration)); // set Flink app class path appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString()); // set Flink on YARN internal configuration values appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount)); appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb)); appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString()); appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString()); appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots)); appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached)); appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace()); // https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md#identity-on-an-insecure-cluster-hadoop_user_name appMasterEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, UserGroupInformation.getCurrentUser().getUserName()); if (keytabResource != null) { appMasterEnv.put(YarnConfigKeys.KEYTAB_PATH, remotePathKeytab.toString()); String principal = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL); appMasterEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, principal); } //To support Yarn Secure Integration Test Scenario if (remoteYarnSiteXmlPath != null && remoteKrb5Path != null) { appMasterEnv.put(YarnConfigKeys.ENV_YARN_SITE_XML_PATH, remoteYarnSiteXmlPath.toString()); appMasterEnv.put(YarnConfigKeys.ENV_KRB5_PATH, remoteKrb5Path.toString()); } if (dynamicPropertiesEncoded != null) { appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded); } // set classpath from YARN configuration Utils.setupYarnClassPath(conf, appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(jobManagerMemoryMb); capability.setVirtualCores(1); String name; if (customName == null) { name = "Flink session with " + taskManagerCount + " TaskManagers"; if (detached) { name += " (detached)"; } } else { name = customName; } appContext.setApplicationName(name); appContext.setApplicationType("Apache Flink"); appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); if (yarnQueue != null) { appContext.setQueue(yarnQueue); } setApplicationTags(appContext); // add a hook to clean up in case deployment fails Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication); Runtime.getRuntime().addShutdownHook(deploymentFailureHook); LOG.info("Submitting application master " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for the cluster to be allocated"); final long startTime = System.currentTimeMillis(); ApplicationReport report; YarnApplicationState lastAppState = YarnApplicationState.NEW; loop: while (true) { try { report = yarnClient.getApplicationReport(appId); } catch (IOException e) { throw new YarnDeploymentException("Failed to deploy the cluster.", e); } YarnApplicationState appState = report.getYarnApplicationState(); LOG.debug("Application State: {}", appState); switch (appState) { case FAILED: case FINISHED: //TODO: the finished state may be valid in flip-6 case KILLED: throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n" + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n" + "yarn logs -applicationId " + appId); //break .. case RUNNING: LOG.info("YARN application has been deployed successfully."); break loop; default: if (appState != lastAppState) { LOG.info("Deploying cluster, current state " + appState); } if (System.currentTimeMillis() - startTime > 60000) { LOG.info( "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster"); } } lastAppState = appState; Thread.sleep(250); } // print the application id for user to cancel themselves. if (isDetachedMode()) { LOG.info("The Flink YARN client has been started in detached mode. In order to stop " + "Flink on YARN, use the following command or a YARN web interface to stop " + "it:\nyarn application -kill " + appId + "\nPlease also note that the " + "temporary files of the YARN session in the home directoy will not be removed."); } // since deployment was successful, remove the hook try { Runtime.getRuntime().removeShutdownHook(deploymentFailureHook); } catch (IllegalStateException e) { // we're already in the shut down hook. } return report; }
From source file:org.apache.flink.yarn.Client.java
License:Apache License
public void run(String[] args) throws Exception { if (UserGroupInformation.isSecurityEnabled()) { throw new RuntimeException("Flink YARN client does not have security support right now." + "File a bug, we will fix it asap"); }/* w w w . j av a 2 s .com*/ //Utils.logFilesInCurrentDirectory(LOG); // // Command Line Options // Options options = new Options(); options.addOption(VERBOSE); options.addOption(FLINK_CONF_DIR); options.addOption(FLINK_JAR); options.addOption(JM_MEMORY); options.addOption(TM_MEMORY); options.addOption(TM_CORES); options.addOption(CONTAINER); options.addOption(GEN_CONF); options.addOption(QUEUE); options.addOption(QUERY); options.addOption(SHIP_PATH); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (MissingOptionException moe) { System.out.println(moe.getMessage()); printUsage(); System.exit(1); } if (System.getProperty("log4j.configuration") == null) { Logger root = Logger.getRootLogger(); root.removeAllAppenders(); PatternLayout layout = new PatternLayout("%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n"); ConsoleAppender appender = new ConsoleAppender(layout, "System.err"); root.addAppender(appender); if (cmd.hasOption(VERBOSE.getOpt())) { root.setLevel(Level.DEBUG); LOG.debug("CLASSPATH: " + System.getProperty("java.class.path")); } else { root.setLevel(Level.INFO); } } // Jar Path Path localJarPath; if (cmd.hasOption(FLINK_JAR.getOpt())) { String userPath = cmd.getOptionValue(FLINK_JAR.getOpt()); if (!userPath.startsWith("file://")) { userPath = "file://" + userPath; } localJarPath = new Path(userPath); } else { localJarPath = new Path( "file://" + Client.class.getProtectionDomain().getCodeSource().getLocation().getPath()); } if (cmd.hasOption(GEN_CONF.getOpt())) { LOG.info("Placing default configuration in current directory"); File outFile = generateDefaultConf(localJarPath); LOG.info("File written to " + outFile.getAbsolutePath()); System.exit(0); } // Conf Path Path confPath = null; String confDirPath = ""; if (cmd.hasOption(FLINK_CONF_DIR.getOpt())) { confDirPath = cmd.getOptionValue(FLINK_CONF_DIR.getOpt()) + "/"; File confFile = new File(confDirPath + CONFIG_FILE_NAME); if (!confFile.exists()) { LOG.fatal("Unable to locate configuration file in " + confFile); System.exit(1); } confPath = new Path(confFile.getAbsolutePath()); } else { System.out.println("No configuration file has been specified"); // no configuration path given. // -> see if there is one in the current directory File currDir = new File("."); File[] candidates = currDir.listFiles(new FilenameFilter() { @Override public boolean accept(final File dir, final String name) { return name != null && name.endsWith(".yaml"); } }); if (candidates == null || candidates.length == 0) { System.out.println( "No configuration file has been found in current directory.\n" + "Copying default."); File outFile = generateDefaultConf(localJarPath); confPath = new Path(outFile.toURI()); } else { if (candidates.length > 1) { System.out.println("Multiple .yaml configuration files were found in the current directory\n" + "Please specify one explicitly"); System.exit(1); } else if (candidates.length == 1) { confPath = new Path(candidates[0].toURI()); } } } List<File> shipFiles = new ArrayList<File>(); // path to directory to ship if (cmd.hasOption(SHIP_PATH.getOpt())) { String shipPath = cmd.getOptionValue(SHIP_PATH.getOpt()); File shipDir = new File(shipPath); if (shipDir.isDirectory()) { shipFiles = new ArrayList<File>(Arrays.asList(shipDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return !(name.equals(".") || name.equals("..")); } }))); } else { LOG.warn("Ship directory is not a directory!"); } } boolean hasLog4j = false; //check if there is a log4j file if (confDirPath.length() > 0) { File l4j = new File(confDirPath + "/log4j.properties"); if (l4j.exists()) { shipFiles.add(l4j); hasLog4j = true; } } // queue String queue = "default"; if (cmd.hasOption(QUEUE.getOpt())) { queue = cmd.getOptionValue(QUEUE.getOpt()); } // JobManager Memory int jmMemory = 512; if (cmd.hasOption(JM_MEMORY.getOpt())) { jmMemory = Integer.valueOf(cmd.getOptionValue(JM_MEMORY.getOpt())); } // Task Managers memory int tmMemory = 1024; if (cmd.hasOption(TM_MEMORY.getOpt())) { tmMemory = Integer.valueOf(cmd.getOptionValue(TM_MEMORY.getOpt())); } // Task Managers vcores int tmCores = 1; if (cmd.hasOption(TM_CORES.getOpt())) { tmCores = Integer.valueOf(cmd.getOptionValue(TM_CORES.getOpt())); } Utils.getFlinkConfiguration(confPath.toUri().getPath()); int jmPort = GlobalConfiguration.getInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, 0); if (jmPort == 0) { LOG.warn("Unable to find job manager port in configuration!"); jmPort = ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT; } conf = Utils.initializeYarnConfiguration(); // intialize HDFS LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path final FileSystem fs = FileSystem.get(conf); if (fs.getScheme().startsWith("file")) { LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the sytem is using the default Hadoop configuration values." + "The Flink YARN client needs to store its files in a distributed file system"); } // Create yarnClient final YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Query cluster for metrics if (cmd.hasOption(QUERY.getOpt())) { showClusterMetrics(yarnClient); } if (!cmd.hasOption(CONTAINER.getOpt())) { LOG.fatal("Missing required argument " + CONTAINER.getOpt()); printUsage(); yarnClient.stop(); System.exit(1); } // TM Count final int taskManagerCount = Integer.valueOf(cmd.getOptionValue(CONTAINER.getOpt())); System.out.println("Using values:"); System.out.println("\tContainer Count = " + taskManagerCount); System.out.println("\tJar Path = " + localJarPath.toUri().getPath()); System.out.println("\tConfiguration file = " + confPath.toUri().getPath()); System.out.println("\tJobManager memory = " + jmMemory); System.out.println("\tTaskManager memory = " + tmMemory); System.out.println("\tTaskManager cores = " + tmCores); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); Resource maxRes = appResponse.getMaximumResourceCapability(); if (tmMemory > maxRes.getMemory() || tmCores > maxRes.getVirtualCores()) { LOG.fatal("The cluster does not have the requested resources for the TaskManagers available!\n" + "Maximum Memory: " + maxRes.getMemory() + ", Maximum Cores: " + tmCores); yarnClient.stop(); System.exit(1); } if (jmMemory > maxRes.getMemory()) { LOG.fatal("The cluster does not have the requested resources for the JobManager available!\n" + "Maximum Memory: " + maxRes.getMemory()); yarnClient.stop(); System.exit(1); } int totalMemoryRequired = jmMemory + tmMemory * taskManagerCount; ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient); if (freeClusterMem.totalFreeMemory < totalMemoryRequired) { LOG.fatal("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. " + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available."); yarnClient.stop(); System.exit(1); } if (tmMemory > freeClusterMem.containerLimit) { LOG.fatal("The requested amount of memory for the TaskManagers (" + tmMemory + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit); yarnClient.stop(); System.exit(1); } if (jmMemory > freeClusterMem.containerLimit) { LOG.fatal("The requested amount of memory for the JobManager (" + jmMemory + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit); yarnClient.stop(); System.exit(1); } // respect custom JVM options in the YAML file final String javaOpts = GlobalConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, ""); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); String amCommand = "$JAVA_HOME/bin/java" + " -Xmx" + Utils.calculateHeapSize(jmMemory) + "M " + javaOpts; if (hasLog4j) { amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties"; } amCommand += " org.apache.flink.yarn.ApplicationMaster" + " " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stdout.log" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stderr.log"; amContainer.setCommands(Collections.singletonList(amCommand)); System.err.println("amCommand=" + amCommand); // Set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); final ApplicationId appId = appContext.getApplicationId(); // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); Path remotePathJar = Utils.setupLocalResource(conf, fs, appId.toString(), localJarPath, appMasterJar, fs.getHomeDirectory()); Path remotePathConf = Utils.setupLocalResource(conf, fs, appId.toString(), confPath, flinkConf, fs.getHomeDirectory()); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2); localResources.put("flink.jar", appMasterJar); localResources.put("flink-conf.yaml", flinkConf); // setup security tokens (code from apache storm) final Path[] paths = new Path[3 + shipFiles.size()]; StringBuffer envShipFileList = new StringBuffer(); // upload ship files for (int i = 0; i < shipFiles.size(); i++) { File shipFile = shipFiles.get(i); LocalResource shipResources = Records.newRecord(LocalResource.class); Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath()); paths[3 + i] = Utils.setupLocalResource(conf, fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory()); localResources.put(shipFile.getName(), shipResources); envShipFileList.append(paths[3 + i]); if (i + 1 < shipFiles.size()) { envShipFileList.append(','); } } paths[0] = remotePathJar; paths[1] = remotePathConf; paths[2] = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/"); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL); fs.setPermission(paths[2], permission); // set permission for path. Utils.setTokensFor(amContainer, paths, this.conf); amContainer.setLocalResources(localResources); fs.close(); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); Utils.setupEnv(conf, appMasterEnv); // set configuration values appMasterEnv.put(Client.ENV_TM_COUNT, String.valueOf(taskManagerCount)); appMasterEnv.put(Client.ENV_TM_CORES, String.valueOf(tmCores)); appMasterEnv.put(Client.ENV_TM_MEMORY, String.valueOf(tmMemory)); appMasterEnv.put(Client.FLINK_JAR_PATH, remotePathJar.toString()); appMasterEnv.put(Client.ENV_APP_ID, appId.toString()); appMasterEnv.put(Client.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString()); appMasterEnv.put(Client.ENV_CLIENT_SHIP_FILES, envShipFileList.toString()); appMasterEnv.put(Client.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName()); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(jmMemory); capability.setVirtualCores(1); appContext.setApplicationName("Flink"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue(queue); // file that we write into the conf/ dir containing the jobManager address. final File addrFile = new File(confDirPath + CliFrontend.JOBMANAGER_ADDRESS_FILE); Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { try { LOG.info("Killing the Flink-YARN application."); yarnClient.killApplication(appId); LOG.info("Deleting files in " + paths[2]); FileSystem shutFS = FileSystem.get(conf); shutFS.delete(paths[2], true); // delete conf and jar file. shutFS.close(); } catch (Exception e) { LOG.warn("Exception while killing the YARN application", e); } try { addrFile.delete(); } catch (Exception e) { LOG.warn("Exception while deleting the jobmanager address file", e); } LOG.info("YARN Client is shutting down"); yarnClient.stop(); } }); LOG.info("Submitting application master " + appId); yarnClient.submitApplication(appContext); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); boolean told = false; char[] el = { '/', '|', '\\', '-' }; int i = 0; while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { if (!told && appState == YarnApplicationState.RUNNING) { System.err.println("Flink JobManager is now running on " + appReport.getHost() + ":" + jmPort); System.err.println("JobManager Web Interface: " + appReport.getTrackingUrl()); // write jobmanager connect information PrintWriter out = new PrintWriter(addrFile); out.println(appReport.getHost() + ":" + jmPort); out.close(); addrFile.setReadable(true, false); // readable for all. told = true; } if (!told) { System.err.print(el[i++] + "\r"); if (i == el.length) { i = 0; } Thread.sleep(500); // wait for the application to switch to RUNNING } else { Thread.sleep(5000); } appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); } LOG.info("Application " + appId + " finished with" + " state " + appState + " at " + appReport.getFinishTime()); if (appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED) { LOG.warn("Application failed. Diagnostics " + appReport.getDiagnostics()); } }
From source file:org.apache.flink.yarn.FlinkYarnClient.java
License:Apache License
/** * This method will block until the ApplicationMaster/JobManager have been * deployed on YARN./* ww w . jav a 2 s.c o m*/ */ protected AbstractFlinkYarnCluster deployInternal() throws Exception { isReadyForDepoyment(); LOG.info("Using values:"); LOG.info("\tTaskManager count = {}", taskManagerCount); LOG.info("\tJobManager memory = {}", jobManagerMemoryMb); LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb); // Create application via yarnClient yarnApplication = yarnClient.createApplication(); GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse(); // ------------------ Add dynamic properties to local flinkConfiguraton ------ List<Tuple2<String, String>> dynProperties = CliFrontend.getDynamicProperties(dynamicPropertiesEncoded); for (Tuple2<String, String> dynProperty : dynProperties) { flinkConfiguration.setString(dynProperty.f0, dynProperty.f1); } // ------------------ Check if the specified queue exists -------------- try { List<QueueInfo> queues = yarnClient.getAllQueues(); if (queues.size() > 0 && this.yarnQueue != null) { // check only if there are queues configured in yarn and for this session. boolean queueFound = false; for (QueueInfo queue : queues) { if (queue.getQueueName().equals(this.yarnQueue)) { queueFound = true; break; } } if (!queueFound) { String queueNames = ""; for (QueueInfo queue : queues) { queueNames += queue.getQueueName() + ", "; } LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: " + queueNames); } } else { LOG.debug("The YARN cluster does not have any queues configured"); } } catch (Throwable e) { LOG.warn("Error while getting queue information from YARN: " + e.getMessage()); if (LOG.isDebugEnabled()) { LOG.debug("Error details", e); } } // ------------------ Check if the YARN Cluster has the requested resources -------------- // the yarnMinAllocationMB specifies the smallest possible container allocation size. // all allocations below this value are automatically set to this value. final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0); if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) { LOG.warn("The JobManager or TaskManager memory is below the smallest possible YARN Container size. " + "The value of 'yarn.scheduler.minimum-allocation-mb' is '" + yarnMinAllocationMB + "'. Please increase the memory size." + "YARN will allocate the smaller containers but the scheduler will account for the minimum-allocation-mb, maybe not all instances " + "you requested will start."); } // set the memory to minAllocationMB to do the next checks correctly if (jobManagerMemoryMb < yarnMinAllocationMB) { jobManagerMemoryMb = yarnMinAllocationMB; } if (taskManagerMemoryMb < yarnMinAllocationMB) { taskManagerMemoryMb = yarnMinAllocationMB; } Resource maxRes = appResponse.getMaximumResourceCapability(); final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the 'yarn.nodemanager.resource.memory-mb' configuration values\n"; if (jobManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(); throw new YarnDeploymentException( "The cluster does not have the requested resources for the JobManager available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb + "MB. " + NOTE); } if (taskManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(); throw new YarnDeploymentException( "The cluster does not have the requested resources for the TaskManagers available!\n" + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb + "MB. " + NOTE); } final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, but maybe not all TaskManagers are " + "connecting from the beginning because the resources are currently not available in the cluster. " + "The allocation might take more time than usual because the Flink YARN client needs to wait until " + "the resources become available."; int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount; ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient); if (freeClusterMem.totalFreeMemory < totalMemoryRequired) { LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. " + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC); } if (taskManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } if (jobManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn( "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } // ----------------- check if the requested containers fit into the cluster. int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length); // first, allocate the jobManager somewhere. if (!allocateResource(nmFree, jobManagerMemoryMb)) { LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. " + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC); } // allocate TaskManagers for (int i = 0; i < taskManagerCount; i++) { if (!allocateResource(nmFree, taskManagerMemoryMb)) { LOG.warn("There is not enough memory available in the YARN cluster. " + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. " + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n" + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/" + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: " + Arrays.toString(nmFree) + NOTE_RSC); } } // ------------------ Prepare Application Master Container ------------------------------ // respect custom JVM options in the YAML file final String javaOpts = flinkConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, ""); String logbackFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME; boolean hasLogback = new File(logbackFile).exists(); String log4jFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME; boolean hasLog4j = new File(log4jFile).exists(); if (hasLogback) { shipFiles.add(new File(logbackFile)); } if (hasLog4j) { shipFiles.add(new File(log4jFile)); } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); String amCommand = "$JAVA_HOME/bin/java" + " -Xmx" + Utils.calculateHeapSize(jobManagerMemoryMb, flinkConfiguration) + "M " + javaOpts; if (hasLogback || hasLog4j) { amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-main.log\""; } if (hasLogback) { amCommand += " -Dlogback.configurationFile=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME; } if (hasLog4j) { amCommand += " -Dlog4j.configuration=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME; } amCommand += " " + ApplicationMaster.class.getName() + " " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stdout.log" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stderr.log"; amContainer.setCommands(Collections.singletonList(amCommand)); LOG.debug("Application Master start command: " + amCommand); // intialize HDFS // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path final FileSystem fs = FileSystem.get(conf); // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method. if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) { LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the sytem is using the default Hadoop configuration values." + "The Flink YARN client needs to store its files in a distributed file system"); } // Set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext(); appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1)); final ApplicationId appId = appContext.getApplicationId(); // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); Path remotePathJar = Utils.setupLocalResource(conf, fs, appId.toString(), flinkJarPath, appMasterJar, fs.getHomeDirectory()); Path remotePathConf = Utils.setupLocalResource(conf, fs, appId.toString(), flinkConfigurationPath, flinkConf, fs.getHomeDirectory()); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2); localResources.put("flink.jar", appMasterJar); localResources.put("flink-conf.yaml", flinkConf); // setup security tokens (code from apache storm) final Path[] paths = new Path[2 + shipFiles.size()]; StringBuilder envShipFileList = new StringBuilder(); // upload ship files for (int i = 0; i < shipFiles.size(); i++) { File shipFile = shipFiles.get(i); LocalResource shipResources = Records.newRecord(LocalResource.class); Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath()); paths[2 + i] = Utils.setupLocalResource(conf, fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory()); localResources.put(shipFile.getName(), shipResources); envShipFileList.append(paths[2 + i]); if (i + 1 < shipFiles.size()) { envShipFileList.append(','); } } paths[0] = remotePathJar; paths[1] = remotePathConf; sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/"); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); fs.setPermission(sessionFilesDir, permission); // set permission for path. Utils.setTokensFor(amContainer, paths, conf); amContainer.setLocalResources(localResources); fs.close(); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); Utils.setupEnv(conf, appMasterEnv); // set configuration values appMasterEnv.put(FlinkYarnClient.ENV_TM_COUNT, String.valueOf(taskManagerCount)); appMasterEnv.put(FlinkYarnClient.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb)); appMasterEnv.put(FlinkYarnClient.FLINK_JAR_PATH, remotePathJar.toString()); appMasterEnv.put(FlinkYarnClient.ENV_APP_ID, appId.toString()); appMasterEnv.put(FlinkYarnClient.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString()); appMasterEnv.put(FlinkYarnClient.ENV_CLIENT_SHIP_FILES, envShipFileList.toString()); appMasterEnv.put(FlinkYarnClient.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName()); appMasterEnv.put(FlinkYarnClient.ENV_SLOTS, String.valueOf(slots)); appMasterEnv.put(FlinkYarnClient.ENV_DETACHED, String.valueOf(detached)); appMasterEnv.put(FlinkYarnClient.ENV_STREAMING_MODE, String.valueOf(streamingMode)); if (dynamicPropertiesEncoded != null) { appMasterEnv.put(FlinkYarnClient.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded); } amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(jobManagerMemoryMb); capability.setVirtualCores(1); String name; if (customName == null) { name = "Flink session with " + taskManagerCount + " TaskManagers"; if (detached) { name += " (detached)"; } } else { name = customName; } appContext.setApplicationName(name); // application name appContext.setApplicationType("Apache Flink"); appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); if (yarnQueue != null) { appContext.setQueue(yarnQueue); } LOG.info("Submitting application master " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for the cluster to be allocated"); int waittime = 0; loop: while (true) { ApplicationReport report = yarnClient.getApplicationReport(appId); YarnApplicationState appState = report.getYarnApplicationState(); switch (appState) { case FAILED: case FINISHED: case KILLED: throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n" + "If log aggregation is enabled on your cluster, use this command to further invesitage the issue:\n" + "yarn logs -applicationId " + appId); //break .. case RUNNING: LOG.info("YARN application has been deployed successfully."); break loop; default: LOG.info("Deploying cluster, current state " + appState); if (waittime > 60000) { LOG.info( "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster"); } } waittime += 1000; Thread.sleep(1000); } // the Flink cluster is deployed in YARN. Represent cluster return new FlinkYarnCluster(yarnClient, appId, conf, flinkConfiguration, sessionFilesDir, detached); }
From source file:org.apache.flink.yarn.FlinkYarnClientBase.java
License:Apache License
/** * This method will block until the ApplicationMaster/JobManager have been * deployed on YARN./*from w ww . ja va 2 s . c o m*/ */ protected AbstractFlinkYarnCluster deployInternal() throws Exception { isReadyForDeployment(); LOG.info("Using values:"); LOG.info("\tTaskManager count = {}", taskManagerCount); LOG.info("\tJobManager memory = {}", jobManagerMemoryMb); LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb); // Create application via yarnClient yarnApplication = yarnClient.createApplication(); GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse(); // ------------------ Add dynamic properties to local flinkConfiguraton ------ Map<String, String> dynProperties = CliFrontend.getDynamicProperties(dynamicPropertiesEncoded); for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) { flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue()); } try { org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration); } catch (IOException e) { throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e); } // ------------------ Check if the specified queue exists -------------- try { List<QueueInfo> queues = yarnClient.getAllQueues(); if (queues.size() > 0 && this.yarnQueue != null) { // check only if there are queues configured in yarn and for this session. boolean queueFound = false; for (QueueInfo queue : queues) { if (queue.getQueueName().equals(this.yarnQueue)) { queueFound = true; break; } } if (!queueFound) { String queueNames = ""; for (QueueInfo queue : queues) { queueNames += queue.getQueueName() + ", "; } LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: " + queueNames); } } else { LOG.debug("The YARN cluster does not have any queues configured"); } } catch (Throwable e) { LOG.warn("Error while getting queue information from YARN: " + e.getMessage()); if (LOG.isDebugEnabled()) { LOG.debug("Error details", e); } } // ------------------ Check if the YARN Cluster has the requested resources -------------- // the yarnMinAllocationMB specifies the smallest possible container allocation size. // all allocations below this value are automatically set to this value. final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0); if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) { LOG.warn("The JobManager or TaskManager memory is below the smallest possible YARN Container size. " + "The value of 'yarn.scheduler.minimum-allocation-mb' is '" + yarnMinAllocationMB + "'. Please increase the memory size." + "YARN will allocate the smaller containers but the scheduler will account for the minimum-allocation-mb, maybe not all instances " + "you requested will start."); } // set the memory to minAllocationMB to do the next checks correctly if (jobManagerMemoryMb < yarnMinAllocationMB) { jobManagerMemoryMb = yarnMinAllocationMB; } if (taskManagerMemoryMb < yarnMinAllocationMB) { taskManagerMemoryMb = yarnMinAllocationMB; } Resource maxRes = appResponse.getMaximumResourceCapability(); final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the 'yarn.nodemanager.resource.memory-mb' configuration values\n"; if (jobManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(); throw new YarnDeploymentException( "The cluster does not have the requested resources for the JobManager available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb + "MB. " + NOTE); } if (taskManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(); throw new YarnDeploymentException( "The cluster does not have the requested resources for the TaskManagers available!\n" + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb + "MB. " + NOTE); } final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, but maybe not all TaskManagers are " + "connecting from the beginning because the resources are currently not available in the cluster. " + "The allocation might take more time than usual because the Flink YARN client needs to wait until " + "the resources become available."; int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount; ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient); if (freeClusterMem.totalFreeMemory < totalMemoryRequired) { LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. " + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC); } if (taskManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } if (jobManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn( "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } // ----------------- check if the requested containers fit into the cluster. int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length); // first, allocate the jobManager somewhere. if (!allocateResource(nmFree, jobManagerMemoryMb)) { LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. " + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC); } // allocate TaskManagers for (int i = 0; i < taskManagerCount; i++) { if (!allocateResource(nmFree, taskManagerMemoryMb)) { LOG.warn("There is not enough memory available in the YARN cluster. " + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. " + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n" + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/" + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: " + Arrays.toString(nmFree) + NOTE_RSC); } } // ------------------ Prepare Application Master Container ------------------------------ // respect custom JVM options in the YAML file final String javaOpts = flinkConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, ""); String logbackFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME; boolean hasLogback = new File(logbackFile).exists(); String log4jFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME; boolean hasLog4j = new File(log4jFile).exists(); if (hasLogback) { shipFiles.add(new File(logbackFile)); } if (hasLog4j) { shipFiles.add(new File(log4jFile)); } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); String amCommand = "$JAVA_HOME/bin/java" + " -Xmx" + Utils.calculateHeapSize(jobManagerMemoryMb, flinkConfiguration) + "M " + javaOpts; if (hasLogback || hasLog4j) { amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.log\""; if (hasLogback) { amCommand += " -Dlogback.configurationFile=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME; } if (hasLog4j) { amCommand += " -Dlog4j.configuration=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME; } } amCommand += " " + getApplicationMasterClass().getName() + " " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.out" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.err"; amContainer.setCommands(Collections.singletonList(amCommand)); LOG.debug("Application Master start command: " + amCommand); // intialize HDFS // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path final FileSystem fs = FileSystem.get(conf); // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method. if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) { LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values." + "The Flink YARN client needs to store its files in a distributed file system"); } // Set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext(); if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) { // activate re-execution of failed applications appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); activateHighAvailabilitySupport(appContext); } else { // set number of application retries to 1 in the default case appContext .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1)); } final ApplicationId appId = appContext.getApplicationId(); // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar, fs.getHomeDirectory()); Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf, fs.getHomeDirectory()); Map<String, LocalResource> localResources = new HashMap<>(2); localResources.put("flink.jar", appMasterJar); localResources.put("flink-conf.yaml", flinkConf); // setup security tokens (code from apache storm) final Path[] paths = new Path[2 + shipFiles.size()]; StringBuilder envShipFileList = new StringBuilder(); // upload ship files for (int i = 0; i < shipFiles.size(); i++) { File shipFile = shipFiles.get(i); LocalResource shipResources = Records.newRecord(LocalResource.class); Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath()); paths[2 + i] = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory()); localResources.put(shipFile.getName(), shipResources); envShipFileList.append(paths[2 + i]); if (i + 1 < shipFiles.size()) { envShipFileList.append(','); } } paths[0] = remotePathJar; paths[1] = remotePathConf; sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/"); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); fs.setPermission(sessionFilesDir, permission); // set permission for path. Utils.setTokensFor(amContainer, paths, conf); amContainer.setLocalResources(localResources); fs.close(); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<>(); // set user specified app master environment variables appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX, flinkConfiguration)); // set classpath from YARN configuration Utils.setupEnv(conf, appMasterEnv); // set Flink on YARN internal configuration values appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount)); appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb)); appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString()); appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName()); appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots)); appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached)); if (dynamicPropertiesEncoded != null) { appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded); } amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(jobManagerMemoryMb); capability.setVirtualCores(1); String name; if (customName == null) { name = "Flink session with " + taskManagerCount + " TaskManagers"; if (detached) { name += " (detached)"; } } else { name = customName; } appContext.setApplicationName(name); // application name appContext.setApplicationType("Apache Flink"); appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); if (yarnQueue != null) { appContext.setQueue(yarnQueue); } // add a hook to clean up in case deployment fails Runtime.getRuntime().addShutdownHook(deploymentFailureHook); LOG.info("Submitting application master " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for the cluster to be allocated"); int waittime = 0; loop: while (true) { ApplicationReport report; try { report = yarnClient.getApplicationReport(appId); } catch (IOException e) { throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage()); } YarnApplicationState appState = report.getYarnApplicationState(); switch (appState) { case FAILED: case FINISHED: case KILLED: throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n" + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n" + "yarn logs -applicationId " + appId); //break .. case RUNNING: LOG.info("YARN application has been deployed successfully."); break loop; default: LOG.info("Deploying cluster, current state " + appState); if (waittime > 60000) { LOG.info( "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster"); } } waittime += 1000; Thread.sleep(1000); } // print the application id for user to cancel themselves. if (isDetached()) { LOG.info("The Flink YARN client has been started in detached mode. In order to stop " + "Flink on YARN, use the following command or a YARN web interface to stop " + "it:\nyarn application -kill " + appId + "\nPlease also note that the " + "temporary files of the YARN session in the home directoy will not be removed."); } // since deployment was successful, remove the hook try { Runtime.getRuntime().removeShutdownHook(deploymentFailureHook); } catch (IllegalStateException e) { // we're already in the shut down hook. } // the Flink cluster is deployed in YARN. Represent cluster return new FlinkYarnCluster(yarnClient, appId, conf, flinkConfiguration, sessionFilesDir, detached); }
From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java
License:Apache License
private void testDetachedPerJobYarnClusterInternal(String job) { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration);/*from www . j a v a 2 s . co m*/ yc.start(); // get temporary folder for writing output of wordcount example File tmpOutFolder = null; try { tmpOutFolder = tmp.newFolder(); } catch (IOException e) { throw new RuntimeException(e); } // get temporary file for reading input data for wordcount example File tmpInFile; try { tmpInFile = tmp.newFile(); FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT); } catch (IOException e) { throw new RuntimeException(e); } Runner runner = startWithArgs( new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt", flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", "-yD", "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly "-ytm", "1024", "-ys", "2", // test requesting slots from YARN. "--yarndetached", job, "--input", tmpInFile.getAbsoluteFile().toString(), "--output", tmpOutFolder.getAbsoluteFile().toString() }, "Job has been submitted with JobID", RunTypes.CLI_FRONTEND); // it should usually be 2, but on slow machines, the number varies Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2); // give the runner some time to detach for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) { try { Thread.sleep(500); } catch (InterruptedException e) { } } Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("CLI Frontend has returned, so the job is running"); // find out the application id and wait until it has finished. try { List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); ApplicationId tmpAppId; if (apps.size() == 1) { // Better method to find the right appId. But sometimes the app is shutting down very fast // Only one running tmpAppId = apps.get(0).getApplicationId(); LOG.info("waiting for the job with appId {} to finish", tmpAppId); // wait until the app has finished while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) { sleep(500); } } else { // get appId by finding the latest finished appid apps = yc.getApplications(); Collections.sort(apps, new Comparator<ApplicationReport>() { @Override public int compare(ApplicationReport o1, ApplicationReport o2) { return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1; } }); tmpAppId = apps.get(0).getApplicationId(); LOG.info("Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray())); } final ApplicationId id = tmpAppId; // now it has finished. // check the output files. File[] listOfOutputFiles = tmpOutFolder.listFiles(); Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles); LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder); // read all output files in output folder to one output string String content = ""; for (File f : listOfOutputFiles) { if (f.isFile()) { content += FileUtils.readFileToString(f) + "\n"; } } //String content = FileUtils.readFileToString(taskmanagerOut); // check for some of the wordcount outputs. Assert.assertTrue("Expected string 'da 5' or '(all,2)' not found in string '" + content + "'", content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)")); Assert.assertTrue("Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'", content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)")); // check if the heap size for the TaskManager was set correctly File jobmanagerLog = YarnTestBase.findFile("..", new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString()); } }); Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog); content = FileUtils.readFileToString(jobmanagerLog); // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE) String expected = "Starting TaskManagers with command: $JAVA_HOME/bin/java -Xms424m -Xmx424m"; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'", content.contains(expected)); expected = " (2/2) (attempt #0) to "; Assert.assertTrue("Expected string '" + expected + "' not found in JobManager log." + "This string checks that the job has been started with a parallelism of 2. Log contents: '" + jobmanagerLog + "'", content.contains(expected)); // make sure the detached app is really finished. LOG.info("Checking again that app has finished"); ApplicationReport rep; do { sleep(500); rep = yc.getApplicationReport(id); LOG.info("Got report {}", rep); } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING); } catch (Throwable t) { LOG.warn("Error while detached yarn session was running", t); Assert.fail(t.getMessage()); } }
From source file:org.apache.hoya.yarn.appmaster.HoyaAppMaster.java
License:Apache License
/** * Create and run the cluster./* w w w .j a v a 2 s. c o m*/ * @return exit code * @throws Throwable on a failure */ private int createAndRunCluster(String clustername) throws Throwable { HoyaVersionInfo.loadAndPrintVersionInfo(log); //load the cluster description from the cd argument String hoyaClusterDir = serviceArgs.getHoyaClusterURI(); URI hoyaClusterURI = new URI(hoyaClusterDir); Path clusterDirPath = new Path(hoyaClusterURI); HoyaFileSystem fs = getClusterFS(); // build up information about the running application -this // will be passed down to the cluster status MapOperations appInformation = new MapOperations(); AggregateConf instanceDefinition = InstanceIO.loadInstanceDefinitionUnresolved(fs, clusterDirPath); log.info("Deploying cluster {}:", instanceDefinition); //REVISIT: why is this done? appState.updateInstanceDefinition(instanceDefinition); File confDir = getLocalConfDir(); if (!confDir.exists() || !confDir.isDirectory()) { log.error("Bad conf dir {}", confDir); File parentFile = confDir.getParentFile(); log.error("Parent dir {}:\n{}", parentFile, HoyaUtils.listDir(parentFile)); throw new BadCommandArgumentsException("Configuration directory %s doesn't exist", confDir); } Configuration serviceConf = getConfig(); // Try to get the proper filtering of static resources through the yarn proxy working serviceConf.set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer"); conf = new YarnConfiguration(serviceConf); //get our provider MapOperations globalOptions = instanceDefinition.getInternalOperations().getGlobalOptions(); String providerType = globalOptions.getMandatoryOption(OptionKeys.INTERNAL_PROVIDER_NAME); log.info("Cluster provider type is {}", providerType); HoyaProviderFactory factory = HoyaProviderFactory.createHoyaProviderFactory(providerType); providerService = factory.createServerProvider(); // init the provider BUT DO NOT START IT YET providerService.init(getConfig()); addService(providerService); InetSocketAddress address = HoyaUtils.getRmSchedulerAddress(conf); log.info("RM is at {}", address); yarnRPC = YarnRPC.create(conf); /* * Extract the container ID. This is then * turned into an (incompete) container */ appMasterContainerID = ConverterUtils.toContainerId( HoyaUtils.mandatoryEnvVariable(ApplicationConstants.Environment.CONTAINER_ID.name())); appAttemptID = appMasterContainerID.getApplicationAttemptId(); ApplicationId appid = appAttemptID.getApplicationId(); log.info("Hoya AM for ID {}", appid.getId()); appInformation.put(StatusKeys.INFO_AM_CONTAINER_ID, appMasterContainerID.toString()); appInformation.put(StatusKeys.INFO_AM_APP_ID, appid.toString()); appInformation.put(StatusKeys.INFO_AM_ATTEMPT_ID, appAttemptID.toString()); UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); Credentials credentials = currentUser.getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); dob.close(); // Now remove the AM->RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); log.info("Token {}", token.getKind()); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); // set up secret manager secretManager = new ClientToAMTokenSecretManager(appAttemptID, null); // if not a secure cluster, extract the username -it will be // propagated to workers if (!UserGroupInformation.isSecurityEnabled()) { hoyaUsername = System.getenv(HADOOP_USER_NAME); log.info(HADOOP_USER_NAME + "='{}'", hoyaUsername); } Map<String, String> envVars; /** * It is critical this section is synchronized, to stop async AM events * arriving while registering a restarting AM. */ synchronized (appState) { int heartbeatInterval = HEARTBEAT_INTERVAL; //add the RM client -this brings the callbacks in asyncRMClient = AMRMClientAsync.createAMRMClientAsync(heartbeatInterval, this); addService(asyncRMClient); //wrap it for the app state model rmOperationHandler = new AsyncRMOperationHandler(asyncRMClient); //now bring it up runChildService(asyncRMClient); //nmclient relays callbacks back to this class nmClientAsync = new NMClientAsyncImpl("nmclient", this); runChildService(nmClientAsync); //bring up the Hoya RPC service startHoyaRPCServer(); InetSocketAddress rpcServiceAddr = rpcService.getConnectAddress(); appMasterHostname = rpcServiceAddr.getHostName(); appMasterRpcPort = rpcServiceAddr.getPort(); appMasterTrackingUrl = null; log.info("AM Server is listening at {}:{}", appMasterHostname, appMasterRpcPort); appInformation.put(StatusKeys.INFO_AM_HOSTNAME, appMasterHostname); appInformation.set(StatusKeys.INFO_AM_RPC_PORT, appMasterRpcPort); //build the role map List<ProviderRole> providerRoles = new ArrayList<ProviderRole>(providerService.getRoles()); providerRoles.addAll(HoyaAMClientProvider.ROLES); // Start up the WebApp and track the URL for it webApp = new HoyaAMWebApp(); WebApps.$for("hoyaam", WebAppApi.class, new WebAppApiImpl(this, appState, providerService), "ws") .with(serviceConf).start(webApp); appMasterTrackingUrl = "http://" + appMasterHostname + ":" + webApp.port(); WebAppService<HoyaAMWebApp> webAppService = new WebAppService<HoyaAMWebApp>("hoya", webApp); webAppService.init(conf); webAppService.start(); addService(webAppService); appInformation.put(StatusKeys.INFO_AM_WEB_URL, appMasterTrackingUrl + "/"); appInformation.set(StatusKeys.INFO_AM_WEB_PORT, webApp.port()); // Register self with ResourceManager // This will start heartbeating to the RM // address = HoyaUtils.getRmSchedulerAddress(asyncRMClient.getConfig()); log.info("Connecting to RM at {},address tracking URL={}", appMasterRpcPort, appMasterTrackingUrl); RegisterApplicationMasterResponse response = asyncRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); Resource maxResources = response.getMaximumResourceCapability(); containerMaxMemory = maxResources.getMemory(); containerMaxCores = maxResources.getVirtualCores(); appState.setContainerLimits(maxResources.getMemory(), maxResources.getVirtualCores()); // set the RM-defined maximum cluster values appInformation.put(ResourceKeys.YARN_CORES, Integer.toString(containerMaxCores)); appInformation.put(ResourceKeys.YARN_MEMORY, Integer.toString(containerMaxMemory)); boolean securityEnabled = UserGroupInformation.isSecurityEnabled(); if (securityEnabled) { secretManager.setMasterKey(response.getClientToAMTokenMasterKey().array()); applicationACLs = response.getApplicationACLs(); //tell the server what the ACLs are rpcService.getServer().refreshServiceAcl(conf, new HoyaAMPolicyProvider()); } // extract container list List<Container> liveContainers = AMRestartSupport.retrieveContainersFromPreviousAttempt(response); String amRestartSupported = Boolean.toString(liveContainers != null); appInformation.put(StatusKeys.INFO_AM_RESTART_SUPPORTED, amRestartSupported); //now validate the installation Configuration providerConf = providerService.loadProviderConfigurationInformation(confDir); providerService.validateApplicationConfiguration(instanceDefinition, confDir, securityEnabled); //determine the location for the role history data Path historyDir = new Path(clusterDirPath, HISTORY_DIR_NAME); //build the instance appState.buildInstance(instanceDefinition, providerConf, providerRoles, fs.getFileSystem(), historyDir, liveContainers, appInformation); // add the AM to the list of nodes in the cluster appState.buildAppMasterNode(appMasterContainerID, appMasterHostname, webApp.port(), appMasterHostname + ":" + webApp.port()); // build up environment variables that the AM wants set in every container // irrespective of provider and role. envVars = new HashMap<String, String>(); if (hoyaUsername != null) { envVars.put(HADOOP_USER_NAME, hoyaUsername); } } String rolesTmpSubdir = appMasterContainerID.toString() + "/roles"; String amTmpDir = globalOptions.getMandatoryOption(OptionKeys.INTERNAL_AM_TMP_DIR); Path tmpDirPath = new Path(amTmpDir); Path launcherTmpDirPath = new Path(tmpDirPath, rolesTmpSubdir); fs.getFileSystem().mkdirs(launcherTmpDirPath); //launcher service launchService = new RoleLaunchService(this, providerService, fs, new Path(getGeneratedConfDir()), envVars, launcherTmpDirPath); runChildService(launchService); appState.noteAMLaunched(); //Give the provider restricted access to the state providerService.bind(appState); // launch the provider; this is expected to trigger a callback that // brings up the service launchProviderService(instanceDefinition, confDir); try { //now block waiting to be told to exit the process waitForAMCompletionSignal(); //shutdown time } finally { finish(); } return amExitCode; }
From source file:org.apache.hoya.yarn.client.HoyaClient.java
License:Apache License
/** * * @param clustername/*from ww w . java 2 s. co m*/ * @param clusterDirectory * @param instanceDefinition * @param debugAM * @return the launched application * @throws YarnException * @throws IOException */ public LaunchedApplication launchApplication(String clustername, Path clusterDirectory, AggregateConf instanceDefinition, boolean debugAM) throws YarnException, IOException { deployedClusterName = clustername; HoyaUtils.validateClusterName(clustername); verifyNoLiveClusters(clustername); Configuration config = getConfig(); boolean clusterSecure = HoyaUtils.isClusterSecure(config); //create the Hoya AM provider -this helps set up the AM HoyaAMClientProvider hoyaAM = new HoyaAMClientProvider(config); instanceDefinition.resolve(); launchedInstanceDefinition = instanceDefinition; ConfTreeOperations internalOperations = instanceDefinition.getInternalOperations(); MapOperations internalOptions = internalOperations.getGlobalOptions(); ConfTreeOperations resourceOperations = instanceDefinition.getResourceOperations(); ConfTreeOperations appOperations = instanceDefinition.getAppConfOperations(); Path generatedConfDirPath = createPathThatMustExist( internalOptions.getMandatoryOption(OptionKeys.INTERNAL_GENERATED_CONF_PATH)); Path snapshotConfPath = createPathThatMustExist( internalOptions.getMandatoryOption(OptionKeys.INTERNAL_SNAPSHOT_CONF_PATH)); // cluster Provider AbstractClientProvider provider = createClientProvider( internalOptions.getMandatoryOption(OptionKeys.INTERNAL_PROVIDER_NAME)); // make sure the conf dir is valid; // now build up the image path // TODO: consider supporting apps that don't have an image path Path imagePath = HoyaUtils.extractImagePath(hoyaFileSystem, internalOptions); if (log.isDebugEnabled()) { log.debug(instanceDefinition.toString()); } MapOperations hoyaAMResourceComponent = resourceOperations.getOrAddComponent(HoyaKeys.COMPONENT_AM); AppMasterLauncher amLauncher = new AppMasterLauncher(clustername, HoyaKeys.APP_TYPE, config, hoyaFileSystem, yarnClient, clusterSecure, hoyaAMResourceComponent); ApplicationId appId = amLauncher.getApplicationId(); // set the application name; amLauncher.setKeepContainersOverRestarts(true); amLauncher.setMaxAppAttempts(config.getInt(KEY_AM_RESTART_LIMIT, DEFAULT_AM_RESTART_LIMIT)); hoyaFileSystem.purgeHoyaAppInstanceTempFiles(clustername); Path tempPath = hoyaFileSystem.createHoyaAppInstanceTempPath(clustername, appId.toString() + "/am"); String libdir = "lib"; Path libPath = new Path(tempPath, libdir); hoyaFileSystem.getFileSystem().mkdirs(libPath); log.debug("FS={}, tempPath={}, libdir={}", hoyaFileSystem.toString(), tempPath, libPath); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = amLauncher.getLocalResources(); // conf directory setup Path remoteConfPath = null; String relativeConfDir = null; String confdirProp = System.getProperty(HoyaKeys.PROPERTY_CONF_DIR); if (confdirProp == null || confdirProp.isEmpty()) { log.debug("No local configuration directory provided as system property"); } else { File confDir = new File(confdirProp); if (!confDir.exists()) { throw new BadConfigException(HOYA_CONFIGURATION_DIRECTORY_NOT_FOUND, confDir); } Path localConfDirPath = HoyaUtils.createLocalPath(confDir); log.debug("Copying AM configuration data from {}", localConfDirPath); remoteConfPath = new Path(clusterDirectory, HoyaKeys.SUBMITTED_CONF_DIR); HoyaUtils.copyDirectory(config, localConfDirPath, remoteConfPath, null); } // the assumption here is that minimr cluster => this is a test run // and the classpath can look after itself if (!getUsingMiniMRCluster()) { log.debug("Destination is not a MiniYARNCluster -copying full classpath"); // insert conf dir first if (remoteConfPath != null) { relativeConfDir = HoyaKeys.SUBMITTED_CONF_DIR; Map<String, LocalResource> submittedConfDir = hoyaFileSystem.submitDirectory(remoteConfPath, relativeConfDir); HoyaUtils.mergeMaps(localResources, submittedConfDir); } log.debug("Copying JARs from local filesystem"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path HoyaUtils.putJar(localResources, hoyaFileSystem, this.getClass(), tempPath, libdir, SLIDER_JAR); } // build up the configuration // IMPORTANT: it is only after this call that site configurations // will be valid. propagatePrincipals(config, instanceDefinition); Configuration clientConfExtras = new Configuration(false); // then build up the generated path. FsPermission clusterPerms = getClusterDirectoryPermissions(config); HoyaUtils.copyDirectory(config, snapshotConfPath, generatedConfDirPath, clusterPerms); // add AM and provider specific artifacts to the resource map Map<String, LocalResource> providerResources; // standard AM resources hoyaAM.prepareAMAndConfigForLaunch(hoyaFileSystem, config, amLauncher, instanceDefinition, snapshotConfPath, generatedConfDirPath, clientConfExtras, libdir, tempPath); //add provider-specific resources provider.prepareAMAndConfigForLaunch(hoyaFileSystem, config, amLauncher, instanceDefinition, snapshotConfPath, generatedConfDirPath, clientConfExtras, libdir, tempPath); // now that the site config is fully generated, the provider gets // to do a quick review of them. log.debug("Preflight validation of cluster configuration"); hoyaAM.preflightValidateClusterConfiguration(hoyaFileSystem, clustername, config, instanceDefinition, clusterDirectory, generatedConfDirPath, clusterSecure); provider.preflightValidateClusterConfiguration(hoyaFileSystem, clustername, config, instanceDefinition, clusterDirectory, generatedConfDirPath, clusterSecure); // now add the image if it was set if (hoyaFileSystem.maybeAddImagePath(localResources, imagePath)) { log.debug("Registered image path {}", imagePath); } // build the environment amLauncher.putEnv(HoyaUtils.buildEnvMap(hoyaAMResourceComponent)); String classpath = HoyaUtils.buildClasspath(relativeConfDir, libdir, getConfig(), getUsingMiniMRCluster()); amLauncher.setEnv("CLASSPATH", classpath); if (log.isDebugEnabled()) { log.debug("AM classpath={}", classpath); log.debug("Environment Map:\n{}", HoyaUtils.stringifyMap(amLauncher.getEnv())); log.debug("Files in lib path\n{}", hoyaFileSystem.listFSDir(libPath)); } // rm address InetSocketAddress rmSchedulerAddress = null; try { rmSchedulerAddress = HoyaUtils.getRmSchedulerAddress(config); } catch (IllegalArgumentException e) { throw new BadConfigException("%s Address invalid: %s", YarnConfiguration.RM_SCHEDULER_ADDRESS, config.get(YarnConfiguration.RM_SCHEDULER_ADDRESS)); } String rmAddr = NetUtils.getHostPortString(rmSchedulerAddress); CommandLineBuilder commandLine = new CommandLineBuilder(); commandLine.addJavaBinary(); // insert any JVM options); hoyaAM.addJVMOptions(instanceDefinition, commandLine); // enable asserts if the text option is set commandLine.enableJavaAssertions(); // add the hoya AM sevice entry point commandLine.add(HoyaAMArgs.CLASSNAME); // create action and the cluster name commandLine.add(HoyaActions.ACTION_CREATE); commandLine.add(clustername); // debug if (debugAM) { commandLine.add(Arguments.ARG_DEBUG); } // set the cluster directory path commandLine.add(Arguments.ARG_HOYA_CLUSTER_URI); commandLine.add(clusterDirectory.toUri().toString()); if (!isUnset(rmAddr)) { commandLine.add(Arguments.ARG_RM_ADDR); commandLine.add(rmAddr); } if (serviceArgs.getFilesystemURL() != null) { commandLine.add(Arguments.ARG_FILESYSTEM); commandLine.add(serviceArgs.getFilesystemURL().toString()); } if (clusterSecure) { // if the cluster is secure, make sure that // the relevant security settings go over propagateConfOption(commandLine, config, HoyaXmlConfKeys.KEY_SECURITY_ENABLED); propagateConfOption(commandLine, config, DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY); } // write out the path output commandLine.addOutAndErrFiles(STDOUT_AM, STDERR_AM); String cmdStr = commandLine.build(); log.info("Completed setting up app master command {}", cmdStr); amLauncher.addCommandLine(commandLine); // the Hoya AM gets to configure the AM requirements, not the custom provider hoyaAM.prepareAMResourceRequirements(hoyaAMResourceComponent, amLauncher.getResource()); // Set the priority for the application master int amPriority = config.getInt(KEY_YARN_QUEUE_PRIORITY, DEFAULT_YARN_QUEUE_PRIORITY); amLauncher.setPriority(amPriority); // Set the queue to which this application is to be submitted in the RM // Queue for App master String amQueue = config.get(KEY_YARN_QUEUE, DEFAULT_HOYA_YARN_QUEUE); amLauncher.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure // submit the application LaunchedApplication launchedApplication = amLauncher.submitApplication(); return launchedApplication; }