List of usage examples for org.apache.hadoop.yarn.api.records ApplicationReport getYarnApplicationState
@Public @Stable public abstract YarnApplicationState getYarnApplicationState();
YarnApplicationState
of the application. From source file:gobblin.yarn.GobblinYarnAppLauncher.java
License:Apache License
@Subscribe public void handleApplicationReportArrivalEvent(ApplicationReportArrivalEvent applicationReportArrivalEvent) { ApplicationReport applicationReport = applicationReportArrivalEvent.getApplicationReport(); YarnApplicationState appState = applicationReport.getYarnApplicationState(); LOGGER.info("Gobblin Yarn application state: " + appState.toString()); // Reset the count on failures to get the ApplicationReport when there's one success this.getApplicationReportFailureCount.set(0); if (appState == YarnApplicationState.FINISHED || appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED) { applicationCompleted = true;/*from ww w . ja v a 2 s .c o m*/ LOGGER.info("Gobblin Yarn application finished with final status: " + applicationReport.getFinalApplicationStatus().toString()); if (applicationReport.getFinalApplicationStatus() == FinalApplicationStatus.FAILED) { LOGGER.error("Gobblin Yarn application failed for the following reason: " + applicationReport.getDiagnostics()); } try { GobblinYarnAppLauncher.this.stop(); } catch (IOException ioe) { LOGGER.error("Failed to close the " + GobblinYarnAppLauncher.class.getSimpleName(), ioe); } catch (TimeoutException te) { LOGGER.error("Timeout in stopping the service manager", te); } finally { if (this.emailNotificationOnShutdown) { sendEmailOnShutdown(Optional.of(applicationReport)); } } } }
From source file:husky.client.HuskyYarnClient.java
License:Apache License
private boolean monitorApp() throws YarnException, IOException { while (true) { try {// ww w .j av a2 s . c o m Thread.sleep(1000); } catch (InterruptedException ignore) { } ApplicationReport report = mYarnClient.getApplicationReport(mAppId); YarnApplicationState yarnState = report.getYarnApplicationState(); FinalApplicationStatus appState = report.getFinalApplicationStatus(); LOG.info("YarnState = " + yarnState + ", AppState = " + appState + ", Progress = " + report.getProgress()); if (yarnState == FINISHED) { if (appState == SUCCEEDED) { LOG.info("Application completed successfully."); return true; } else { LOG.info("Application completed unsuccessfully. YarnState: " + yarnState.toString() + ", ApplicationState: " + appState.toString()); return false; } } else if (yarnState == KILLED || yarnState == FAILED) { LOG.info("Application did not complete. YarnState: " + yarnState.toString() + ", ApplicationState: " + appState.toString()); return false; } } }
From source file:hws.core.JobClient.java
License:Apache License
public void run(String[] args) throws Exception { //final String command = args[0]; //final int n = Integer.valueOf(args[1]); //final Path jarPath = new Path(args[2]); Options options = new Options(); /*options.addOption(OptionBuilder.withLongOpt("jar") .withDescription( "Jar path" ) .hasArg()/*from w ww . ja va 2 s . c om*/ .withArgName("JarPath") .create()); options.addOption(OptionBuilder.withLongOpt("scheduler") .withDescription( "Scheduler class name" ) .hasArg() .withArgName("ClassName") .create()); */options.addOption(OptionBuilder.withLongOpt("zk-servers") .withDescription("List of the ZooKeeper servers").hasArgs().withArgName("zkAddrs").create("zks")); //options.addOption("l", "list", false, "list modules"); options.addOption(OptionBuilder.withLongOpt("load").withDescription("load new modules").hasArgs() .withArgName("XMLFiles").create()); /*options.addOption(OptionBuilder.withLongOpt( "remove" ) .withDescription( "remove modules" ) .hasArgs() .withArgName("ModuleNames") .create("rm")); */CommandLineParser parser = new BasicParser(); CommandLine cmd = parser.parse(options, args); //Path jarPath = null; //String schedulerClassName = null; String[] xmlFileNames = null; //String []moduleNames = null; String zksArgs = ""; String[] zkServers = null; if (cmd.hasOption("zks")) { zksArgs = "-zks"; zkServers = cmd.getOptionValues("zks"); for (String zks : zkServers) { zksArgs += " " + zks; } } //Logger setup //FSDataOutputStream writer = FileSystem.get(conf).create(new Path("hdfs:///hws/apps/"+appIdStr+"/logs/jobClient.log")); //Logger.addOutputStream(writer); /*if(cmd.hasOption("l")){ LOG.warn("Argument --list (-l) is not supported yet."); } if(cmd.hasOption("jar")){ jarPath = new Path(cmd.getOptionValue("jar")); } if(cmd.hasOption("scheduler")){ schedulerClassName = cmd.getOptionValue("scheduler"); }*/ if (cmd.hasOption("load")) { xmlFileNames = cmd.getOptionValues("load"); } /*else if(cmd.hasOption("rm")){ moduleNames = cmd.getOptionValues("rm"); }*/ //LOG.info("Jar-Path "+jarPath); if (xmlFileNames != null) { String paths = ""; for (String path : xmlFileNames) { paths += path + "; "; } LOG.info("Load XMLs: " + paths); } /*if(moduleNames!=null){ String modules = ""; for(String module: moduleNames){ modules += module+"; "; } LOG.info("remove: "+modules); }*/ // Create yarnClient YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); System.out.println("LOG Path: " + ApplicationConstants.LOG_DIR_EXPANSION_VAR); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); ZkClient zk = new ZkClient(zkServers[0]); //TODO select a ZooKeeper server if (!zk.exists("/hadoop-watershed")) { zk.createPersistent("/hadoop-watershed", ""); } zk.createPersistent("/hadoop-watershed/" + appId.toString(), ""); FileSystem fs = FileSystem.get(conf); LOG.info("Collecting files to upload"); fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString())); fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString() + "/logs")); ModulePipeline modulePipeline = ModulePipeline.fromXMLFiles(xmlFileNames); LOG.info("Uploading files to HDFS"); for (String path : modulePipeline.files()) { uploadFile(fs, new File(path), appId); } LOG.info("Upload finished"); String modulePipelineJson = Json.dumps(modulePipeline); String modulePipelineBase64 = Base64.encodeBase64String(StringUtils.getBytesUtf8(modulePipelineJson)) .replaceAll("\\s", ""); LOG.info("ModulePipeline: " + modulePipelineJson); //LOG.info("ModulePipeline: "+modulePipelineBase64); amContainer.setCommands(Collections.singletonList("$JAVA_HOME/bin/java" + " -Xmx256M" + " hws.core.JobMaster" + " -aid " + appId.toString() + " --load " + modulePipelineBase64 + " " + zksArgs + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); // Setup jar for ApplicationMaster //LocalResource appMasterJar = Records.newRecord(LocalResource.class); //setupAppMasterJar(jarPath, appMasterJar); //amContainer.setLocalResources(Collections.singletonMap("hws.jar", appMasterJar)); LOG.info("Listing files for YARN-Watershed"); RemoteIterator<LocatedFileStatus> filesIterator = fs.listFiles(new Path("hdfs:///hws/bin/"), false); Map<String, LocalResource> resources = new HashMap<String, LocalResource>(); LOG.info("Files setup as resource"); while (filesIterator.hasNext()) { LocatedFileStatus fileStatus = filesIterator.next(); // Setup jar for ApplicationMaster LocalResource containerJar = Records.newRecord(LocalResource.class); ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar); resources.put(fileStatus.getPath().getName(), containerJar); } LOG.info("container resource setup"); amContainer.setLocalResources(resources); fs.close(); //closing FileSystem interface // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); ContainerUtils.setupContainerEnv(appMasterEnv, conf); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(256); capability.setVirtualCores(1); // Finally, set-up ApplicationSubmissionContext for the application //ApplicationSubmissionContext appContext = //app.getApplicationSubmissionContext(); appContext.setApplicationName("Hadoop-Watershed"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue("default"); // queue // Submit application LOG.info("Submitting application " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for containers to finish"); zk.waitUntilExists("/hadoop-watershed/" + appId.toString() + "/done", TimeUnit.MILLISECONDS, 250); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { Thread.sleep(100); appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); } System.out.println("Application " + appId + " finished with" + " state " + appState + " at " + appReport.getFinishTime()); System.out.println("deleting " + appId.toString() + " znode"); zk.deleteRecursive("/hadoop-watershed/" + appId.toString()); //TODO remove app folder from ZooKeeper }
From source file:io.hops.hopsworks.api.zeppelin.rest.InterpreterRestApi.java
License:Apache License
private List<JobAdministration.YarnApplicationReport> fetchJobs() { JobAdministration jobAdmin = new JobAdministration(); List<JobAdministration.YarnApplicationReport> reports = new ArrayList<>(); YarnClient client = YarnClient.createYarnClient(); Configuration conf = settings.getConfiguration(); client.init(conf);//from ww w. j av a2 s . c om client.start(); try { //Create our custom YarnApplicationReport Pojo for (ApplicationReport appReport : client.getApplications(PREDICATE)) { reports.add(jobAdmin.new YarnApplicationReport(appReport.getApplicationId().toString(), appReport.getName(), appReport.getUser(), appReport.getStartTime(), appReport.getFinishTime(), appReport.getApplicationId().getClusterTimestamp(), appReport.getApplicationId().getId(), appReport.getYarnApplicationState().name())); } } catch (YarnException | IOException ex) { logger.error("", ex); } return reports; }
From source file:io.hops.hopsworks.api.zeppelin.rest.InterpreterRestApi.java
License:Apache License
private List<JobAdministration.YarnApplicationReport> fetchJobs(String username) { JobAdministration jobAdmin = new JobAdministration(); List<JobAdministration.YarnApplicationReport> reports = new ArrayList<>(); YarnClient client = YarnClient.createYarnClient(); Configuration conf = settings.getConfiguration(); client.init(conf);/*from ww w . jav a 2 s .co m*/ client.start(); try { //Create our custom YarnApplicationReport Pojo for (ApplicationReport appReport : client.getApplications(PREDICATE)) { if (username.equals(appReport.getUser())) { reports.add(jobAdmin.new YarnApplicationReport(appReport.getApplicationId().toString(), appReport.getName(), appReport.getUser(), appReport.getStartTime(), appReport.getFinishTime(), appReport.getApplicationId().getClusterTimestamp(), appReport.getApplicationId().getId(), appReport.getYarnApplicationState().name())); } } } catch (YarnException | IOException ex) { logger.error("", ex); } return reports; }
From source file:io.hops.hopsworks.common.admin.llap.LlapClusterFacade.java
License:Open Source License
public boolean isClusterUp() { String llapAppID = variablesFacade.getVariableValue(Settings.VARIABLE_LLAP_APP_ID); if (llapAppID == null || llapAppID.isEmpty()) { return false; }// ww w. java 2s . c o m ApplicationId appId = ApplicationId.fromString(llapAppID); YarnClient yarnClient = yarnClientService.getYarnClientSuper(settings.getConfiguration()).getYarnClient(); ApplicationReport applicationReport = null; try { applicationReport = yarnClient.getApplicationReport(appId); } catch (IOException | YarnException e) { logger.log(Level.SEVERE, "Could not retrieve application state for llap cluster with appId: " + appId.toString(), e); return false; } finally { try { yarnClient.close(); } catch (IOException ex) { } } YarnApplicationState appState = applicationReport.getYarnApplicationState(); return appState == YarnApplicationState.RUNNING || appState == YarnApplicationState.SUBMITTED || appState == YarnApplicationState.ACCEPTED || appState == YarnApplicationState.NEW || appState == YarnApplicationState.NEW_SAVING; }
From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java
License:Apache License
/** * This method will block until the ApplicationMaster/JobManager have been * deployed on YARN.// w w w . ja v a2 s . co m */ protected YarnClusterClient deployInternal() throws Exception { isReadyForDeployment(); LOG.info("Using values:"); LOG.info("\tTaskManager count = {}", taskManagerCount); LOG.info("\tJobManager memory = {}", jobManagerMemoryMb); LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb); final YarnClient yarnClient = getYarnClient(); // ------------------ Check if the specified queue exists -------------------- try { List<QueueInfo> queues = yarnClient.getAllQueues(); // check only if there are queues configured in yarn and for this session. if (queues.size() > 0 && this.yarnQueue != null) { boolean queueFound = false; for (QueueInfo queue : queues) { if (queue.getQueueName().equals(this.yarnQueue)) { queueFound = true; break; } } if (!queueFound) { String queueNames = ""; for (QueueInfo queue : queues) { queueNames += queue.getQueueName() + ", "; } LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: " + queueNames); } } else { LOG.debug("The YARN cluster does not have any queues configured"); } } catch (Throwable e) { LOG.warn("Error while getting queue information from YARN: " + e.getMessage()); if (LOG.isDebugEnabled()) { LOG.debug("Error details", e); } } // Create application via yarnClient final YarnClientApplication yarnApplication = yarnClient.createApplication(); GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse(); Map<String, String> jobSystemProperties = new HashMap<>(2); // Certificates are materialized locally so DFSClient can be set to null // LocalResources are not used by Flink, so set it null HopsUtils.copyUserKafkaCerts(services.getUserCerts(), project, username, services.getSettings().getHopsworksTmpCertDir(), services.getSettings().getHdfsTmpCertDir(), JobType.FLINK, null, null, jobSystemProperties, services.getSettings().getFlinkKafkaCertDir(), appResponse.getApplicationId().toString()); StringBuilder tmpBuilder = new StringBuilder(); for (Map.Entry<String, String> prop : jobSystemProperties.entrySet()) { String option = YarnRunner.escapeForShell("-D" + prop.getKey() + "=" + prop.getValue()); javaOptions.add(option); addHopsworksParam(option); tmpBuilder.append(prop.getKey()).append("=").append(prop.getValue()).append("@@"); } dynamicPropertiesEncoded += tmpBuilder.toString(); // ------------------ Add dynamic properties to local flinkConfiguraton ------ Map<String, String> dynProperties = getDynamicProperties(dynamicPropertiesEncoded); for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) { flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue()); } // ------------------ Set default file system scheme ------------------------- try { org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration); } catch (IOException e) { throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e); } // initialize file system // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path final FileSystem fs = FileSystem.get(conf); // hard coded check for the GoogleHDFS client because its not overriding the // getScheme() method. if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) { LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the system is " + "using the default Hadoop configuration values. The Flink YARN " + "client needs to store its files in a distributed file system"); } // ------ Check if the YARN ClusterClient has the requested resources --- // the yarnMinAllocationMB specifies the smallest possible container // allocation size. all allocations below this value are automatically // set to this value. final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0); if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) { LOG.warn("The JobManager or TaskManager memory is below the smallest possible " + "YARN Container size. The value of 'yarn.scheduler.minimum-allocation-mb'" + " is " + yarnMinAllocationMB + "'. Please increase the memory size." + "YARN will allocate the smaller containers but the scheduler will" + " account for the minimum-allocation-mb, maybe not all instances " + "you requested will start."); } // set the memory to minAllocationMB to do the next checks correctly if (jobManagerMemoryMb < yarnMinAllocationMB) { jobManagerMemoryMb = yarnMinAllocationMB; } if (taskManagerMemoryMb < yarnMinAllocationMB) { taskManagerMemoryMb = yarnMinAllocationMB; } Resource maxRes = appResponse.getMaximumResourceCapability(); final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the " + "'yarn.nodemanager.resource.memory-mb' configuration values\n"; if (jobManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(yarnClient, yarnApplication); throw new YarnDeploymentException("The cluster does not have the requested resources for the JobManager" + " available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb + "MB. " + NOTE); } if (taskManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(yarnClient, yarnApplication); throw new YarnDeploymentException( "The cluster does not have the requested resources for the TaskManagers available!\n" + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb + "MB. " + NOTE); } final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, " + "but maybe not all TaskManagers are connecting from the beginning " + "because the resources are currently not available in the cluster. " + "The allocation might take more time than usual because the Flink " + "YARN client needs to wait until the resources become available."; int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount; ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient); if (freeClusterMem.totalFreeMemory < totalMemoryRequired) { LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. " + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC); } if (taskManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } if (jobManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn( "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } // ----------------- check if the requested containers fit into the cluster. int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length); // first, allocate the jobManager somewhere. if (!allocateResource(nmFree, jobManagerMemoryMb)) { LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. " + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC); } // allocate TaskManagers for (int i = 0; i < taskManagerCount; i++) { if (!allocateResource(nmFree, taskManagerMemoryMb)) { LOG.warn("There is not enough memory available in the YARN cluster. " + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. " + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n" + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/" + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: " + Arrays.toString(nmFree) + NOTE_RSC); } } Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size()); for (File file : shipFiles) { effectiveShipFiles.add(file.getAbsoluteFile()); } //check if there is a logback or log4j file File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME); final boolean hasLogback = logbackFile.exists(); if (hasLogback) { effectiveShipFiles.add(logbackFile); } File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME); final boolean hasLog4j = log4jFile.exists(); if (hasLog4j) { effectiveShipFiles.add(log4jFile); if (hasLogback) { // this means there is already a logback configuration file --> fail LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and " + "Logback configuration files. Please delete or rename one of them."); } } addLibFolderToShipFiles(effectiveShipFiles); final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j); // Set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext(); final ApplicationId appId = appContext.getApplicationId(); // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------ String zkNamespace = getZookeeperNamespace(); // no user specified cli argument for namespace? if (zkNamespace == null || zkNamespace.isEmpty()) { // namespace defined in config? else use applicationId as default. zkNamespace = flinkConfiguration.getString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY, String.valueOf(appId)); setZookeeperNamespace(zkNamespace); } flinkConfiguration.setString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY, zkNamespace); if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) { // activate re-execution of failed applications appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); activateHighAvailabilitySupport(appContext); } else { // set number of application retries to 1 in the default case appContext .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1)); } // local resource map for Yarn final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size()); // list of remote paths (after upload) final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size()); // classpath assembler final StringBuilder classPathBuilder = new StringBuilder(); // ship list that enables reuse of resources for task manager containers StringBuilder envShipFileList = new StringBuilder(); // upload and register ship files for (File shipFile : effectiveShipFiles) { LocalResource shipResources = Records.newRecord(LocalResource.class); Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath()); Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory()); paths.add(remotePath); localResources.put(shipFile.getName(), shipResources); classPathBuilder.append(shipFile.getName()); if (shipFile.isDirectory()) { // add directories to the classpath classPathBuilder.append(File.separator).append("*"); } classPathBuilder.append(File.pathSeparator); envShipFileList.append(remotePath).append(","); } //////////////////////////////////////////////////////////////////////////// /* * Add Hops LocalResources paths here * */ //Add it to localResources for (Entry<String, LocalResource> entry : hopsworksResources.entrySet()) { localResources.put(entry.getKey(), entry.getValue()); //Append name to classPathBuilder classPathBuilder.append(entry.getKey()); classPathBuilder.append(File.pathSeparator); } //////////////////////////////////////////////////////////////////////////// // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar, fs.getHomeDirectory()); Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf, fs.getHomeDirectory()); localResources.put("flink.jar", appMasterJar); localResources.put("flink-conf.yaml", flinkConf); paths.add(remotePathJar); classPathBuilder.append("flink.jar").append(File.pathSeparator); paths.add(remotePathConf); classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator); sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/"); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); fs.setPermission(sessionFilesDir, permission); // set permission for path. // setup security tokens Utils.setTokensFor(amContainer, paths, conf); amContainer.setLocalResources(localResources); fs.close(); // Setup CLASSPATH and environment variables for ApplicationMaster final Map<String, String> appMasterEnv = new HashMap<>(); // set user specified app master environment variables appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX, flinkConfiguration)); // set Flink app class path appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString()); // set Flink on YARN internal configuration values appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount)); appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb)); appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString()); appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName()); appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots)); appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached)); appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace()); if (dynamicPropertiesEncoded != null) { appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded); } // set classpath from YARN configuration Utils.setupYarnClassPath(conf, appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(jobManagerMemoryMb); capability.setVirtualCores(1); String name; if (customName == null) { name = "Flink session with " + taskManagerCount + " TaskManagers"; if (detached) { name += " (detached)"; } } else { name = customName; } appContext.setApplicationName(name); // application name appContext.setApplicationType("Apache Flink"); appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); if (yarnQueue != null) { appContext.setQueue(yarnQueue); } // add a hook to clean up in case deployment fails Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication); Runtime.getRuntime().addShutdownHook(deploymentFailureHook); LOG.info("Submitting application master " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for the cluster to be allocated"); final long startTime = System.currentTimeMillis(); ApplicationReport report; YarnApplicationState lastAppState = YarnApplicationState.NEW; loop: while (true) { try { report = yarnClient.getApplicationReport(appId); } catch (IOException e) { throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage()); } YarnApplicationState appState = report.getYarnApplicationState(); switch (appState) { case FAILED: case FINISHED: case KILLED: throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n" + "If log aggregation is enabled on your cluster, use this " + "command to further investigate the issue:\n" + "yarn logs -applicationId " + appId); //break .. case RUNNING: LOG.info("YARN application has been deployed successfully."); break loop; default: if (appState != lastAppState) { LOG.info("Deploying cluster, current state " + appState); } if (System.currentTimeMillis() - startTime > 60000) { LOG.info("Deployment took more than 60 seconds. Please check if the " + "requested resources are available in the YARN cluster"); } } lastAppState = appState; Thread.sleep(250); } // print the application id for user to cancel themselves. if (isDetachedMode()) { LOG.info("The Flink YARN client has been started in detached mode. In order to stop " + "Flink on YARN, use the following command or a YARN web interface to stop " + "it:\nyarn application -kill " + appId + "\nPlease also note that the " + "temporary files of the YARN session in the home directoy will not be removed."); } // since deployment was successful, remove the hook try { Runtime.getRuntime().removeShutdownHook(deploymentFailureHook); } catch (IllegalStateException e) { // we're already in the shut down hook. } String host = report.getHost(); int port = report.getRpcPort(); // Correctly initialize the Flink config flinkConfiguration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, host); flinkConfiguration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, port); // the Flink cluster is deployed in YARN. Represent cluster return createYarnClusterClient(this, yarnClient, report, flinkConfiguration, sessionFilesDir, true); }
From source file:io.hops.hopsworks.common.jobs.flink.YarnClusterClient.java
License:Apache License
public ApplicationStatus getApplicationStatus() { if (!isConnected) { throw new IllegalStateException("The cluster has been connected to the ApplicationMaster."); }//from w ww.j av a2s.co m ApplicationReport lastReport = null; if (pollingRunner == null) { LOG.warn("YarnClusterClient.getApplicationStatus() has been called on an uninitialized cluster." + "The system might be in an erroneous state"); } else { lastReport = pollingRunner.getLastReport(); } if (lastReport == null) { LOG.warn("YarnClusterClient.getApplicationStatus() has been called on a " + "cluster that didn't receive a status so far." + "The system might be in an erroneous state"); return ApplicationStatus.UNKNOWN; } else { YarnApplicationState appState = lastReport.getYarnApplicationState(); ApplicationStatus status = (appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED) ? ApplicationStatus.FAILED : ApplicationStatus.SUCCEEDED; if (status != ApplicationStatus.SUCCEEDED) { LOG.warn("YARN reported application state {}", appState); LOG.warn("Diagnostics: {}", lastReport.getDiagnostics()); } return status; } }
From source file:io.hops.hopsworks.common.jobs.flink.YarnClusterClient.java
License:Apache License
/** * Shuts down the Yarn application//from ww w . j ava 2s .co m */ public void shutdownCluster() { if (hasBeenShutDown.getAndSet(true)) { return; } if (!isConnected) { throw new IllegalStateException("The cluster has been not been connected to the ApplicationMaster."); } try { Runtime.getRuntime().removeShutdownHook(clientShutdownHook); } catch (IllegalStateException e) { // we are already in the shutdown hook } LOG.info("Sending shutdown request to the Application Master"); try { Future<Object> response = Patterns.ask(applicationClient.get(), new YarnMessages.LocalStopYarnSession( getApplicationStatus(), "Flink YARN Client requested shutdown"), new Timeout(akkaDuration)); Await.ready(response, akkaDuration); } catch (Exception e) { LOG.warn("Error while stopping YARN cluster.", e); } try { File propertiesFile = FlinkYarnSessionCli.getYarnPropertiesLocation(flinkConfig); if (propertiesFile.isFile()) { if (propertiesFile.delete()) { LOG.info("Deleted Yarn properties file at {}", propertiesFile.getAbsoluteFile().toString()); } else { LOG.warn("Couldn't delete Yarn properties file at {}", propertiesFile.getAbsoluteFile().toString()); } } } catch (Exception e) { LOG.warn("Exception while deleting the JobManager address file", e); } if (sessionFilesDir != null) { LOG.info("Deleting files in " + sessionFilesDir); try { FileSystem shutFS = FileSystem.get(hadoopConfig); shutFS.delete(sessionFilesDir, true); // delete conf and jar file. shutFS.close(); } catch (IOException e) { LOG.error("Could not delete the Flink jar and configuration files in HDFS..", e); } } else { LOG.warn("Session file directory not set. Not deleting session files"); } try { pollingRunner.stopRunner(); pollingRunner.join(1000); } catch (InterruptedException e) { LOG.warn("Shutdown of the polling runner was interrupted", e); Thread.currentThread().interrupt(); } try { ApplicationReport appReport = yarnClient.getApplicationReport(appId); LOG.info("Application " + appId + " finished with state " + appReport.getYarnApplicationState() + " and final state " + appReport.getFinalApplicationStatus() + " at " + appReport.getFinishTime()); if (appReport.getYarnApplicationState() == YarnApplicationState.FAILED || appReport.getYarnApplicationState() == YarnApplicationState.KILLED) { LOG.warn("Application failed. Diagnostics " + appReport.getDiagnostics()); LOG.warn("If log aggregation is activated in the Hadoop cluster, we recommend to retrieve " + "the full application log using this command:\n" + "\tyarn logs -appReport " + appReport.getApplicationId() + "\n" + "(It sometimes takes a few seconds until the logs are aggregated)"); } } catch (Exception e) { LOG.warn("Couldn't get final report", e); } LOG.info("YARN Client is shutting down"); yarnClient.stop(); // actorRunner is using the yarnClient. yarnClient = null; // set null to clearly see if somebody wants to access it afterwards. }
From source file:io.hops.tensorflow.Client.java
License:Apache License
/** * Monitor the submitted application for completion. * Kill application if time expires./*from ww w. j a va 2 s. c o m*/ * * @param appId * Application Id of application to be monitored * @return true if application completed successfully * @throws YarnException * @throws IOException */ public boolean monitorApplication(ApplicationId appId) throws YarnException, IOException { YarnApplicationState oldState = null; while (true) { // Check app status every 1 second. try { Thread.sleep(1000); } catch (InterruptedException e) { LOG.debug("Thread sleep in monitoring loop interrupted"); } ApplicationReport report = yarnClient.getApplicationReport(appId); YarnApplicationState state = report.getYarnApplicationState(); FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); if (oldState != state) { LOG.info("Got application report from ASM for" + "\n\t appId=" + appId.getId() + "\n\t clientToAMToken=" + report.getClientToAMToken() + "\n\t appDiagnostics=" + report.getDiagnostics() + "\n\t appMasterHost=" + report.getHost() + "\n\t appQueue=" + report.getQueue() + "\n\t appMasterRpcPort=" + report.getRpcPort() + "\n\t appStartTime=" + report.getStartTime() + "\n\t yarnAppState=" + report.getYarnApplicationState().toString() + "\n\t distributedFinalState=" + report.getFinalApplicationStatus().toString() + "\n\t appTrackingUrl=" + report.getTrackingUrl() + "\n\t appUser=" + report.getUser()); oldState = state; } else { LOG.info("Got application report from ASM for " + appId + " (state: " + state + ")"); } if (YarnApplicationState.FINISHED == state) { if (FinalApplicationStatus.SUCCEEDED == dsStatus) { LOG.info("Application has completed successfully. Breaking monitoring loop"); return true; } else { LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) { LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop"); return false; } if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { LOG.info("Reached client specified timeout for application. Killing application"); forceKillApplication(appId); return false; } } }