List of usage examples for org.apache.hadoop.yarn.api.records YarnApplicationState NEW
YarnApplicationState NEW
To view the source code for org.apache.hadoop.yarn.api.records YarnApplicationState NEW.
Click Source Link
From source file:com.datatorrent.stram.client.StramClientUtils.java
License:Apache License
public static ApplicationReport getStartedAppInstanceByName(YarnClient clientRMService, String appName, String user, String excludeAppId) throws YarnException, IOException { List<ApplicationReport> applications = clientRMService .getApplications(Sets.newHashSet(StramClient.YARN_APPLICATION_TYPE), EnumSet.of(YarnApplicationState.RUNNING, YarnApplicationState.ACCEPTED, YarnApplicationState.NEW, YarnApplicationState.NEW_SAVING, YarnApplicationState.SUBMITTED)); // see whether there is an app with the app name and user name running for (ApplicationReport app : applications) { if (!app.getApplicationId().toString().equals(excludeAppId) && app.getName().equals(appName) && app.getUser().equals(user)) { return app; }/*from w w w. j a v a 2 s .c o m*/ } return null; }
From source file:com.hazelcast.yarn.HazelcastYarnClient.java
License:Open Source License
private YarnApplicationState process(YarnClient yarnClient, ApplicationId appId, YarnApplicationState appState) throws InterruptedException, YarnException, IOException { ApplicationReport appReport;//from w ww .j av a2 s . c o m while (appState == YarnApplicationState.NEW || appState == YarnApplicationState.NEW_SAVING || appState == YarnApplicationState.SUBMITTED || appState == YarnApplicationState.ACCEPTED) { TimeUnit.SECONDS.sleep(1L); appReport = yarnClient.getApplicationReport(appId); if ((appState != YarnApplicationState.ACCEPTED) && (appReport.getYarnApplicationState() == YarnApplicationState.ACCEPTED)) { LOG.log(Level.INFO, "Application {0} is ACCEPTED.", appId); } appState = appReport.getYarnApplicationState(); } return appState; }
From source file:io.hops.hopsworks.common.admin.llap.LlapClusterFacade.java
License:Open Source License
public boolean isClusterUp() { String llapAppID = variablesFacade.getVariableValue(Settings.VARIABLE_LLAP_APP_ID); if (llapAppID == null || llapAppID.isEmpty()) { return false; }/*from w w w . j a v a 2 s . co m*/ ApplicationId appId = ApplicationId.fromString(llapAppID); YarnClient yarnClient = yarnClientService.getYarnClientSuper(settings.getConfiguration()).getYarnClient(); ApplicationReport applicationReport = null; try { applicationReport = yarnClient.getApplicationReport(appId); } catch (IOException | YarnException e) { logger.log(Level.SEVERE, "Could not retrieve application state for llap cluster with appId: " + appId.toString(), e); return false; } finally { try { yarnClient.close(); } catch (IOException ex) { } } YarnApplicationState appState = applicationReport.getYarnApplicationState(); return appState == YarnApplicationState.RUNNING || appState == YarnApplicationState.SUBMITTED || appState == YarnApplicationState.ACCEPTED || appState == YarnApplicationState.NEW || appState == YarnApplicationState.NEW_SAVING; }
From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java
License:Apache License
/** * This method will block until the ApplicationMaster/JobManager have been * deployed on YARN.//from w ww . j ava 2 s .c o m */ protected YarnClusterClient deployInternal() throws Exception { isReadyForDeployment(); LOG.info("Using values:"); LOG.info("\tTaskManager count = {}", taskManagerCount); LOG.info("\tJobManager memory = {}", jobManagerMemoryMb); LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb); final YarnClient yarnClient = getYarnClient(); // ------------------ Check if the specified queue exists -------------------- try { List<QueueInfo> queues = yarnClient.getAllQueues(); // check only if there are queues configured in yarn and for this session. if (queues.size() > 0 && this.yarnQueue != null) { boolean queueFound = false; for (QueueInfo queue : queues) { if (queue.getQueueName().equals(this.yarnQueue)) { queueFound = true; break; } } if (!queueFound) { String queueNames = ""; for (QueueInfo queue : queues) { queueNames += queue.getQueueName() + ", "; } LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: " + queueNames); } } else { LOG.debug("The YARN cluster does not have any queues configured"); } } catch (Throwable e) { LOG.warn("Error while getting queue information from YARN: " + e.getMessage()); if (LOG.isDebugEnabled()) { LOG.debug("Error details", e); } } // Create application via yarnClient final YarnClientApplication yarnApplication = yarnClient.createApplication(); GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse(); Map<String, String> jobSystemProperties = new HashMap<>(2); // Certificates are materialized locally so DFSClient can be set to null // LocalResources are not used by Flink, so set it null HopsUtils.copyUserKafkaCerts(services.getUserCerts(), project, username, services.getSettings().getHopsworksTmpCertDir(), services.getSettings().getHdfsTmpCertDir(), JobType.FLINK, null, null, jobSystemProperties, services.getSettings().getFlinkKafkaCertDir(), appResponse.getApplicationId().toString()); StringBuilder tmpBuilder = new StringBuilder(); for (Map.Entry<String, String> prop : jobSystemProperties.entrySet()) { String option = YarnRunner.escapeForShell("-D" + prop.getKey() + "=" + prop.getValue()); javaOptions.add(option); addHopsworksParam(option); tmpBuilder.append(prop.getKey()).append("=").append(prop.getValue()).append("@@"); } dynamicPropertiesEncoded += tmpBuilder.toString(); // ------------------ Add dynamic properties to local flinkConfiguraton ------ Map<String, String> dynProperties = getDynamicProperties(dynamicPropertiesEncoded); for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) { flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue()); } // ------------------ Set default file system scheme ------------------------- try { org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration); } catch (IOException e) { throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e); } // initialize file system // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path final FileSystem fs = FileSystem.get(conf); // hard coded check for the GoogleHDFS client because its not overriding the // getScheme() method. if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) { LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the system is " + "using the default Hadoop configuration values. The Flink YARN " + "client needs to store its files in a distributed file system"); } // ------ Check if the YARN ClusterClient has the requested resources --- // the yarnMinAllocationMB specifies the smallest possible container // allocation size. all allocations below this value are automatically // set to this value. final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0); if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) { LOG.warn("The JobManager or TaskManager memory is below the smallest possible " + "YARN Container size. The value of 'yarn.scheduler.minimum-allocation-mb'" + " is " + yarnMinAllocationMB + "'. Please increase the memory size." + "YARN will allocate the smaller containers but the scheduler will" + " account for the minimum-allocation-mb, maybe not all instances " + "you requested will start."); } // set the memory to minAllocationMB to do the next checks correctly if (jobManagerMemoryMb < yarnMinAllocationMB) { jobManagerMemoryMb = yarnMinAllocationMB; } if (taskManagerMemoryMb < yarnMinAllocationMB) { taskManagerMemoryMb = yarnMinAllocationMB; } Resource maxRes = appResponse.getMaximumResourceCapability(); final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the " + "'yarn.nodemanager.resource.memory-mb' configuration values\n"; if (jobManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(yarnClient, yarnApplication); throw new YarnDeploymentException("The cluster does not have the requested resources for the JobManager" + " available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb + "MB. " + NOTE); } if (taskManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(yarnClient, yarnApplication); throw new YarnDeploymentException( "The cluster does not have the requested resources for the TaskManagers available!\n" + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb + "MB. " + NOTE); } final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, " + "but maybe not all TaskManagers are connecting from the beginning " + "because the resources are currently not available in the cluster. " + "The allocation might take more time than usual because the Flink " + "YARN client needs to wait until the resources become available."; int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount; ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient); if (freeClusterMem.totalFreeMemory < totalMemoryRequired) { LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. " + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC); } if (taskManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } if (jobManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn( "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } // ----------------- check if the requested containers fit into the cluster. int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length); // first, allocate the jobManager somewhere. if (!allocateResource(nmFree, jobManagerMemoryMb)) { LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. " + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC); } // allocate TaskManagers for (int i = 0; i < taskManagerCount; i++) { if (!allocateResource(nmFree, taskManagerMemoryMb)) { LOG.warn("There is not enough memory available in the YARN cluster. " + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. " + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n" + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/" + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: " + Arrays.toString(nmFree) + NOTE_RSC); } } Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size()); for (File file : shipFiles) { effectiveShipFiles.add(file.getAbsoluteFile()); } //check if there is a logback or log4j file File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME); final boolean hasLogback = logbackFile.exists(); if (hasLogback) { effectiveShipFiles.add(logbackFile); } File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME); final boolean hasLog4j = log4jFile.exists(); if (hasLog4j) { effectiveShipFiles.add(log4jFile); if (hasLogback) { // this means there is already a logback configuration file --> fail LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and " + "Logback configuration files. Please delete or rename one of them."); } } addLibFolderToShipFiles(effectiveShipFiles); final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j); // Set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext(); final ApplicationId appId = appContext.getApplicationId(); // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------ String zkNamespace = getZookeeperNamespace(); // no user specified cli argument for namespace? if (zkNamespace == null || zkNamespace.isEmpty()) { // namespace defined in config? else use applicationId as default. zkNamespace = flinkConfiguration.getString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY, String.valueOf(appId)); setZookeeperNamespace(zkNamespace); } flinkConfiguration.setString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY, zkNamespace); if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) { // activate re-execution of failed applications appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); activateHighAvailabilitySupport(appContext); } else { // set number of application retries to 1 in the default case appContext .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1)); } // local resource map for Yarn final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size()); // list of remote paths (after upload) final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size()); // classpath assembler final StringBuilder classPathBuilder = new StringBuilder(); // ship list that enables reuse of resources for task manager containers StringBuilder envShipFileList = new StringBuilder(); // upload and register ship files for (File shipFile : effectiveShipFiles) { LocalResource shipResources = Records.newRecord(LocalResource.class); Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath()); Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory()); paths.add(remotePath); localResources.put(shipFile.getName(), shipResources); classPathBuilder.append(shipFile.getName()); if (shipFile.isDirectory()) { // add directories to the classpath classPathBuilder.append(File.separator).append("*"); } classPathBuilder.append(File.pathSeparator); envShipFileList.append(remotePath).append(","); } //////////////////////////////////////////////////////////////////////////// /* * Add Hops LocalResources paths here * */ //Add it to localResources for (Entry<String, LocalResource> entry : hopsworksResources.entrySet()) { localResources.put(entry.getKey(), entry.getValue()); //Append name to classPathBuilder classPathBuilder.append(entry.getKey()); classPathBuilder.append(File.pathSeparator); } //////////////////////////////////////////////////////////////////////////// // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar, fs.getHomeDirectory()); Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf, fs.getHomeDirectory()); localResources.put("flink.jar", appMasterJar); localResources.put("flink-conf.yaml", flinkConf); paths.add(remotePathJar); classPathBuilder.append("flink.jar").append(File.pathSeparator); paths.add(remotePathConf); classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator); sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/"); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); fs.setPermission(sessionFilesDir, permission); // set permission for path. // setup security tokens Utils.setTokensFor(amContainer, paths, conf); amContainer.setLocalResources(localResources); fs.close(); // Setup CLASSPATH and environment variables for ApplicationMaster final Map<String, String> appMasterEnv = new HashMap<>(); // set user specified app master environment variables appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX, flinkConfiguration)); // set Flink app class path appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString()); // set Flink on YARN internal configuration values appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount)); appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb)); appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString()); appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName()); appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots)); appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached)); appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace()); if (dynamicPropertiesEncoded != null) { appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded); } // set classpath from YARN configuration Utils.setupYarnClassPath(conf, appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(jobManagerMemoryMb); capability.setVirtualCores(1); String name; if (customName == null) { name = "Flink session with " + taskManagerCount + " TaskManagers"; if (detached) { name += " (detached)"; } } else { name = customName; } appContext.setApplicationName(name); // application name appContext.setApplicationType("Apache Flink"); appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); if (yarnQueue != null) { appContext.setQueue(yarnQueue); } // add a hook to clean up in case deployment fails Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication); Runtime.getRuntime().addShutdownHook(deploymentFailureHook); LOG.info("Submitting application master " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for the cluster to be allocated"); final long startTime = System.currentTimeMillis(); ApplicationReport report; YarnApplicationState lastAppState = YarnApplicationState.NEW; loop: while (true) { try { report = yarnClient.getApplicationReport(appId); } catch (IOException e) { throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage()); } YarnApplicationState appState = report.getYarnApplicationState(); switch (appState) { case FAILED: case FINISHED: case KILLED: throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n" + "If log aggregation is enabled on your cluster, use this " + "command to further investigate the issue:\n" + "yarn logs -applicationId " + appId); //break .. case RUNNING: LOG.info("YARN application has been deployed successfully."); break loop; default: if (appState != lastAppState) { LOG.info("Deploying cluster, current state " + appState); } if (System.currentTimeMillis() - startTime > 60000) { LOG.info("Deployment took more than 60 seconds. Please check if the " + "requested resources are available in the YARN cluster"); } } lastAppState = appState; Thread.sleep(250); } // print the application id for user to cancel themselves. if (isDetachedMode()) { LOG.info("The Flink YARN client has been started in detached mode. In order to stop " + "Flink on YARN, use the following command or a YARN web interface to stop " + "it:\nyarn application -kill " + appId + "\nPlease also note that the " + "temporary files of the YARN session in the home directoy will not be removed."); } // since deployment was successful, remove the hook try { Runtime.getRuntime().removeShutdownHook(deploymentFailureHook); } catch (IllegalStateException e) { // we're already in the shut down hook. } String host = report.getHost(); int port = report.getRpcPort(); // Correctly initialize the Flink config flinkConfiguration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, host); flinkConfiguration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, port); // the Flink cluster is deployed in YARN. Represent cluster return createYarnClusterClient(this, yarnClient, report, flinkConfiguration, sessionFilesDir, true); }
From source file:org.apache.drill.yarn.core.YarnRMClient.java
License:Apache License
/** * Waits for the application to start. This version is somewhat informal, the * intended use is when debugging unmanaged applications. * * @throws YarnClientException/*from w w w . ja va 2s . co m*/ */ public ApplicationAttemptId waitForStart() throws YarnClientException { ApplicationReport appReport; YarnApplicationState appState; ApplicationAttemptId attemptId; for (;;) { appReport = getAppReport(); appState = appReport.getYarnApplicationState(); attemptId = appReport.getCurrentApplicationAttemptId(); if (appState != YarnApplicationState.NEW && appState != YarnApplicationState.NEW_SAVING && appState != YarnApplicationState.SUBMITTED) { break; } System.out.println("App State: " + appState); try { Thread.sleep(1000); } catch (InterruptedException e) { // Should never occur. } } if (appState != YarnApplicationState.ACCEPTED) { throw new YarnClientException("Application start failed with status " + appState); } return attemptId; }
From source file:org.apache.flink.yarn.AbstractYarnClusterDescriptor.java
License:Apache License
public ApplicationReport startAppMaster(JobGraph jobGraph, YarnClient yarnClient, YarnClientApplication yarnApplication) throws Exception { // ------------------ Set default file system scheme ------------------------- try {/*from w ww .j a v a2 s . c o m*/ org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration); } catch (IOException e) { throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e); } // initialize file system // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path final FileSystem fs = FileSystem.get(conf); // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method. if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) { LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values." + "The Flink YARN client needs to store its files in a distributed file system"); } ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext(); Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size()); for (File file : shipFiles) { effectiveShipFiles.add(file.getAbsoluteFile()); } //check if there is a logback or log4j file File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME); final boolean hasLogback = logbackFile.exists(); if (hasLogback) { effectiveShipFiles.add(logbackFile); } File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME); final boolean hasLog4j = log4jFile.exists(); if (hasLog4j) { effectiveShipFiles.add(log4jFile); if (hasLogback) { // this means there is already a logback configuration file --> fail LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and " + "Logback configuration files. Please delete or rename one of them."); } } addLibFolderToShipFiles(effectiveShipFiles); // add the user jar to the classpath of the to-be-created cluster if (userJarFiles != null) { effectiveShipFiles.addAll(userJarFiles); } // Set-up ApplicationSubmissionContext for the application final ApplicationId appId = appContext.getApplicationId(); // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------ String zkNamespace = getZookeeperNamespace(); // no user specified cli argument for namespace? if (zkNamespace == null || zkNamespace.isEmpty()) { // namespace defined in config? else use applicationId as default. zkNamespace = flinkConfiguration.getString(HighAvailabilityOptions.HA_CLUSTER_ID, String.valueOf(appId)); setZookeeperNamespace(zkNamespace); } flinkConfiguration.setString(HighAvailabilityOptions.HA_CLUSTER_ID, zkNamespace); if (HighAvailabilityMode.isHighAvailabilityModeActivated(flinkConfiguration)) { // activate re-execution of failed applications appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); activateHighAvailabilitySupport(appContext); } else { // set number of application retries to 1 in the default case appContext .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1)); } // local resource map for Yarn final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size()); // list of remote paths (after upload) final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size()); // classpath assembler final StringBuilder classPathBuilder = new StringBuilder(); // ship list that enables reuse of resources for task manager containers StringBuilder envShipFileList = new StringBuilder(); // upload and register ship files for (File shipFile : effectiveShipFiles) { LocalResource shipResources = Records.newRecord(LocalResource.class); Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath()); Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory()); paths.add(remotePath); localResources.put(shipFile.getName(), shipResources); if (shipFile.isDirectory()) { // add directories to the classpath java.nio.file.Path shipPath = shipFile.toPath(); final java.nio.file.Path parentPath = shipPath.getParent(); Files.walkFileTree(shipPath, new SimpleFileVisitor<java.nio.file.Path>() { @Override public FileVisitResult preVisitDirectory(java.nio.file.Path dir, BasicFileAttributes attrs) throws IOException { super.preVisitDirectory(dir, attrs); java.nio.file.Path relativePath = parentPath.relativize(dir); classPathBuilder.append(relativePath).append(File.separator).append("*") .append(File.pathSeparator); return FileVisitResult.CONTINUE; } }); } else { // add files to the classpath classPathBuilder.append(shipFile.getName()).append(File.pathSeparator); } envShipFileList.append(remotePath).append(","); } // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar, fs.getHomeDirectory()); Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf, fs.getHomeDirectory()); localResources.put("flink.jar", appMasterJar); localResources.put("flink-conf.yaml", flinkConf); paths.add(remotePathJar); classPathBuilder.append("flink.jar").append(File.pathSeparator); paths.add(remotePathConf); classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator); // write job graph to tmp file and add it to local resource // TODO: server use user main method to generate job graph if (jobGraph != null) { try { File fp = File.createTempFile(appId.toString(), null); fp.deleteOnExit(); try (FileOutputStream output = new FileOutputStream(fp); ObjectOutputStream obOutput = new ObjectOutputStream(output);) { obOutput.writeObject(jobGraph); } LocalResource jobgraph = Records.newRecord(LocalResource.class); Path remoteJobGraph = Utils.setupLocalResource(fs, appId.toString(), new Path(fp.toURI()), jobgraph, fs.getHomeDirectory()); localResources.put("job.graph", jobgraph); paths.add(remoteJobGraph); classPathBuilder.append("job.graph").append(File.pathSeparator); } catch (Exception e) { LOG.warn("Add job graph to local resource fail"); throw e; } } sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/"); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); fs.setPermission(sessionFilesDir, permission); // set permission for path. //To support Yarn Secure Integration Test Scenario //In Integration test setup, the Yarn containers created by YarnMiniCluster does not have the Yarn site XML //and KRB5 configuration files. We are adding these files as container local resources for the container //applications (JM/TMs) to have proper secure cluster setup Path remoteKrb5Path = null; Path remoteYarnSiteXmlPath = null; boolean hasKrb5 = false; if (System.getenv("IN_TESTS") != null) { String krb5Config = System.getProperty("java.security.krb5.conf"); if (krb5Config != null && krb5Config.length() != 0) { File krb5 = new File(krb5Config); LOG.info("Adding KRB5 configuration {} to the AM container local resource bucket", krb5.getAbsolutePath()); LocalResource krb5ConfResource = Records.newRecord(LocalResource.class); Path krb5ConfPath = new Path(krb5.getAbsolutePath()); remoteKrb5Path = Utils.setupLocalResource(fs, appId.toString(), krb5ConfPath, krb5ConfResource, fs.getHomeDirectory()); localResources.put(Utils.KRB5_FILE_NAME, krb5ConfResource); File f = new File(System.getenv("YARN_CONF_DIR"), Utils.YARN_SITE_FILE_NAME); LOG.info("Adding Yarn configuration {} to the AM container local resource bucket", f.getAbsolutePath()); LocalResource yarnConfResource = Records.newRecord(LocalResource.class); Path yarnSitePath = new Path(f.getAbsolutePath()); remoteYarnSiteXmlPath = Utils.setupLocalResource(fs, appId.toString(), yarnSitePath, yarnConfResource, fs.getHomeDirectory()); localResources.put(Utils.YARN_SITE_FILE_NAME, yarnConfResource); hasKrb5 = true; } } // setup security tokens LocalResource keytabResource = null; Path remotePathKeytab = null; String keytab = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_KEYTAB); if (keytab != null) { LOG.info("Adding keytab {} to the AM container local resource bucket", keytab); keytabResource = Records.newRecord(LocalResource.class); Path keytabPath = new Path(keytab); remotePathKeytab = Utils.setupLocalResource(fs, appId.toString(), keytabPath, keytabResource, fs.getHomeDirectory()); localResources.put(Utils.KEYTAB_FILE_NAME, keytabResource); } final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j, hasKrb5); if (UserGroupInformation.isSecurityEnabled() && keytab == null) { //set tokens only when keytab is not provided LOG.info("Adding delegation token to the AM container.."); Utils.setTokensFor(amContainer, paths, conf); } amContainer.setLocalResources(localResources); fs.close(); // Setup CLASSPATH and environment variables for ApplicationMaster final Map<String, String> appMasterEnv = new HashMap<>(); // set user specified app master environment variables appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX, flinkConfiguration)); // set Flink app class path appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString()); // set Flink on YARN internal configuration values appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount)); appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb)); appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString()); appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString()); appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots)); appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached)); appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace()); // https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md#identity-on-an-insecure-cluster-hadoop_user_name appMasterEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, UserGroupInformation.getCurrentUser().getUserName()); if (keytabResource != null) { appMasterEnv.put(YarnConfigKeys.KEYTAB_PATH, remotePathKeytab.toString()); String principal = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL); appMasterEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, principal); } //To support Yarn Secure Integration Test Scenario if (remoteYarnSiteXmlPath != null && remoteKrb5Path != null) { appMasterEnv.put(YarnConfigKeys.ENV_YARN_SITE_XML_PATH, remoteYarnSiteXmlPath.toString()); appMasterEnv.put(YarnConfigKeys.ENV_KRB5_PATH, remoteKrb5Path.toString()); } if (dynamicPropertiesEncoded != null) { appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded); } // set classpath from YARN configuration Utils.setupYarnClassPath(conf, appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(jobManagerMemoryMb); capability.setVirtualCores(1); String name; if (customName == null) { name = "Flink session with " + taskManagerCount + " TaskManagers"; if (detached) { name += " (detached)"; } } else { name = customName; } appContext.setApplicationName(name); appContext.setApplicationType("Apache Flink"); appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); if (yarnQueue != null) { appContext.setQueue(yarnQueue); } setApplicationTags(appContext); // add a hook to clean up in case deployment fails Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication); Runtime.getRuntime().addShutdownHook(deploymentFailureHook); LOG.info("Submitting application master " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for the cluster to be allocated"); final long startTime = System.currentTimeMillis(); ApplicationReport report; YarnApplicationState lastAppState = YarnApplicationState.NEW; loop: while (true) { try { report = yarnClient.getApplicationReport(appId); } catch (IOException e) { throw new YarnDeploymentException("Failed to deploy the cluster.", e); } YarnApplicationState appState = report.getYarnApplicationState(); LOG.debug("Application State: {}", appState); switch (appState) { case FAILED: case FINISHED: //TODO: the finished state may be valid in flip-6 case KILLED: throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n" + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n" + "yarn logs -applicationId " + appId); //break .. case RUNNING: LOG.info("YARN application has been deployed successfully."); break loop; default: if (appState != lastAppState) { LOG.info("Deploying cluster, current state " + appState); } if (System.currentTimeMillis() - startTime > 60000) { LOG.info( "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster"); } } lastAppState = appState; Thread.sleep(250); } // print the application id for user to cancel themselves. if (isDetachedMode()) { LOG.info("The Flink YARN client has been started in detached mode. In order to stop " + "Flink on YARN, use the following command or a YARN web interface to stop " + "it:\nyarn application -kill " + appId + "\nPlease also note that the " + "temporary files of the YARN session in the home directoy will not be removed."); } // since deployment was successful, remove the hook try { Runtime.getRuntime().removeShutdownHook(deploymentFailureHook); } catch (IllegalStateException e) { // we're already in the shut down hook. } return report; }
From source file:org.apache.ignite.yarn.IgniteYarnClient.java
License:Apache License
/** * Main methods has one mandatory parameter and one optional parameter. * * @param args Path to jar mandatory parameter and property file is optional. *///from ww w . jav a2 s . co m public static void main(String[] args) throws Exception { checkArguments(args); // Set path to app master jar. String pathAppMasterJar = args[0]; ClusterProperties props = ClusterProperties.from(args.length == 2 ? args[1] : null); YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); FileSystem fs = FileSystem.get(conf); Path ignite; // Load ignite and jar if (props.ignitePath() == null) ignite = getIgnite(props, fs); else ignite = new Path(props.ignitePath()); // Upload the jar file to HDFS. Path appJar = IgniteYarnUtils.copyLocalToHdfs(fs, pathAppMasterJar, props.igniteWorkDir() + File.separator + IgniteYarnUtils.JAR_NAME); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); amContainer.setCommands(Collections .singletonList(Environment.JAVA_HOME.$() + "/bin/java -Xmx512m " + ApplicationMaster.class.getName() + IgniteYarnUtils.SPACE + ignite.toUri() + IgniteYarnUtils.YARN_LOG_OUT)); // Setup jar for ApplicationMaster LocalResource appMasterJar = IgniteYarnUtils.setupFile(appJar, fs, LocalResourceType.FILE); amContainer.setLocalResources(Collections.singletonMap(IgniteYarnUtils.JAR_NAME, appMasterJar)); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = props.toEnvs(); setupAppMasterEnv(appMasterEnv, conf); amContainer.setEnvironment(appMasterEnv); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials creds = new Credentials(); String tokRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokRenewer == null || tokRenewer.length() == 0) throw new IOException("Master Kerberos principal for the RM is not set."); log.info("Found RM principal: " + tokRenewer); final Token<?> tokens[] = fs.addDelegationTokens(tokRenewer, creds); if (tokens != null) log.info("File system delegation tokens: " + Arrays.toString(tokens)); amContainer.setTokens(IgniteYarnUtils.createTokenBuffer(creds)); } // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(512); capability.setVirtualCores(1); // Finally, set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName("ignition"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue("default"); // queue // Submit application ApplicationId appId = appContext.getApplicationId(); yarnClient.submitApplication(appContext); log.log(Level.INFO, "Submitted application. Application id: {0}", appId); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState == YarnApplicationState.NEW || appState == YarnApplicationState.NEW_SAVING || appState == YarnApplicationState.SUBMITTED || appState == YarnApplicationState.ACCEPTED) { TimeUnit.SECONDS.sleep(1L); appReport = yarnClient.getApplicationReport(appId); if (appState != YarnApplicationState.ACCEPTED && appReport.getYarnApplicationState() == YarnApplicationState.ACCEPTED) log.log(Level.INFO, "Application {0} is ACCEPTED.", appId); appState = appReport.getYarnApplicationState(); } log.log(Level.INFO, "Application {0} is {1}.", new Object[] { appId, appState }); }
From source file:org.apache.slider.client.SliderClient.java
License:Apache License
/** * Implement the list action./* w ww. j av a2 s . c om*/ * @param clustername List out specific instance name * @param args Action list arguments * @return 0 if one or more entries were listed * @throws IOException * @throws YarnException * @throws UnknownApplicationInstanceException if a specific instance * was named but it was not found */ @Override @VisibleForTesting public int actionList(String clustername, ActionListArgs args) throws IOException, YarnException { verifyBindingsDefined(); boolean live = args.live; String state = args.state; boolean verbose = args.verbose; if (live && !state.isEmpty()) { throw new BadCommandArgumentsException( Arguments.ARG_LIVE + " and " + Arguments.ARG_STATE + " are exclusive"); } // flag to indicate only services in a specific state are to be listed boolean listOnlyInState = live || !state.isEmpty(); YarnApplicationState min, max; if (live) { min = YarnApplicationState.NEW; max = YarnApplicationState.RUNNING; } else if (!state.isEmpty()) { YarnApplicationState stateVal = extractYarnApplicationState(state); min = max = stateVal; } else { min = YarnApplicationState.NEW; max = YarnApplicationState.KILLED; } // get the complete list of persistent instances Map<String, Path> persistentInstances = sliderFileSystem.listPersistentInstances(); if (persistentInstances.isEmpty() && isUnset(clustername)) { // an empty listing is a success if no cluster was named log.debug("No application instances found"); return EXIT_SUCCESS; } // and those the RM knows about List<ApplicationReport> instances = listSliderInstances(null); SliderUtils.sortApplicationsByMostRecent(instances); Map<String, ApplicationReport> reportMap = SliderUtils.buildApplicationReportMap(instances, min, max); log.debug("Persisted {} deployed {} filtered[{}-{}] & de-duped to {}", persistentInstances.size(), instances.size(), min, max, reportMap.size()); if (isSet(clustername)) { // only one instance is expected // resolve the persistent value Path persistent = persistentInstances.get(clustername); if (persistent == null) { throw unknownClusterException(clustername); } // create a new map with only that instance in it. // this restricts the output of results to this instance persistentInstances = new HashMap<String, Path>(); persistentInstances.put(clustername, persistent); } // at this point there is either the entire list or a stripped down instance int listed = 0; for (String name : persistentInstances.keySet()) { ApplicationReport report = reportMap.get(name); if (!listOnlyInState || report != null) { // list the details if all were requested, or the filtering contained // a report listed++; String details = instanceDetailsToString(name, report, verbose); print(details); } } return listed > 0 ? EXIT_SUCCESS : EXIT_FALSE; }
From source file:org.apache.tez.client.LocalClient.java
License:Apache License
protected YarnApplicationState convertDAGAppMasterState(DAGAppMasterState dagAppMasterState) { switch (dagAppMasterState) { case NEW://from w w w. j a v a 2 s .c o m return YarnApplicationState.NEW; case INITED: case RECOVERING: case IDLE: case RUNNING: return YarnApplicationState.RUNNING; case SUCCEEDED: return YarnApplicationState.FINISHED; case FAILED: return YarnApplicationState.FAILED; case KILLED: return YarnApplicationState.KILLED; case ERROR: return YarnApplicationState.FAILED; default: return YarnApplicationState.SUBMITTED; } }
From source file:org.apache.tez.client.TestTezClient.java
License:Apache License
@Test(timeout = 5000) public void testWaitTillReady_Interrupt() throws Exception { final TezClientForTest client = configure(); client.start();/*from w w w . ja va 2 s . c o m*/ when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState()) .thenReturn(YarnApplicationState.NEW); final AtomicReference<Exception> exceptionReference = new AtomicReference<Exception>(); Thread thread = new Thread() { @Override public void run() { try { client.waitTillReady(); } catch (Exception e) { exceptionReference.set(e); } }; }; thread.start(); thread.join(250); thread.interrupt(); thread.join(); Assert.assertThat(exceptionReference.get(), CoreMatchers.instanceOf(InterruptedException.class)); client.stop(); }