List of usage examples for org.apache.hadoop.yarn.client.api YarnClient start
@Override public void start()
From source file:org.apache.flink.yarn.AbstractYarnClusterTest.java
License:Apache License
/** * Tests that the cluster retrieval of a finished YARN application fails. *///from w w w .j a va 2 s .c o m @Test(expected = ClusterRetrieveException.class) public void testClusterClientRetrievalOfFinishedYarnApplication() throws Exception { final ApplicationId applicationId = ApplicationId.newInstance(System.currentTimeMillis(), 42); final ApplicationReport applicationReport = createApplicationReport(applicationId, YarnApplicationState.FINISHED, FinalApplicationStatus.SUCCEEDED); final YarnClient yarnClient = new TestingYarnClient( Collections.singletonMap(applicationId, applicationReport)); final YarnConfiguration yarnConfiguration = new YarnConfiguration(); yarnClient.init(yarnConfiguration); yarnClient.start(); final TestingAbstractYarnClusterDescriptor clusterDescriptor = new TestingAbstractYarnClusterDescriptor( new Configuration(), yarnConfiguration, temporaryFolder.newFolder().getAbsolutePath(), yarnClient, false); try { clusterDescriptor.retrieve(applicationId); } finally { clusterDescriptor.close(); } }
From source file:org.apache.flink.yarn.Client.java
License:Apache License
public void run(String[] args) throws Exception { if (UserGroupInformation.isSecurityEnabled()) { throw new RuntimeException("Flink YARN client does not have security support right now." + "File a bug, we will fix it asap"); }/* w w w.j a va2s .c o m*/ //Utils.logFilesInCurrentDirectory(LOG); // // Command Line Options // Options options = new Options(); options.addOption(VERBOSE); options.addOption(FLINK_CONF_DIR); options.addOption(FLINK_JAR); options.addOption(JM_MEMORY); options.addOption(TM_MEMORY); options.addOption(TM_CORES); options.addOption(CONTAINER); options.addOption(GEN_CONF); options.addOption(QUEUE); options.addOption(QUERY); options.addOption(SHIP_PATH); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (MissingOptionException moe) { System.out.println(moe.getMessage()); printUsage(); System.exit(1); } if (System.getProperty("log4j.configuration") == null) { Logger root = Logger.getRootLogger(); root.removeAllAppenders(); PatternLayout layout = new PatternLayout("%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n"); ConsoleAppender appender = new ConsoleAppender(layout, "System.err"); root.addAppender(appender); if (cmd.hasOption(VERBOSE.getOpt())) { root.setLevel(Level.DEBUG); LOG.debug("CLASSPATH: " + System.getProperty("java.class.path")); } else { root.setLevel(Level.INFO); } } // Jar Path Path localJarPath; if (cmd.hasOption(FLINK_JAR.getOpt())) { String userPath = cmd.getOptionValue(FLINK_JAR.getOpt()); if (!userPath.startsWith("file://")) { userPath = "file://" + userPath; } localJarPath = new Path(userPath); } else { localJarPath = new Path( "file://" + Client.class.getProtectionDomain().getCodeSource().getLocation().getPath()); } if (cmd.hasOption(GEN_CONF.getOpt())) { LOG.info("Placing default configuration in current directory"); File outFile = generateDefaultConf(localJarPath); LOG.info("File written to " + outFile.getAbsolutePath()); System.exit(0); } // Conf Path Path confPath = null; String confDirPath = ""; if (cmd.hasOption(FLINK_CONF_DIR.getOpt())) { confDirPath = cmd.getOptionValue(FLINK_CONF_DIR.getOpt()) + "/"; File confFile = new File(confDirPath + CONFIG_FILE_NAME); if (!confFile.exists()) { LOG.fatal("Unable to locate configuration file in " + confFile); System.exit(1); } confPath = new Path(confFile.getAbsolutePath()); } else { System.out.println("No configuration file has been specified"); // no configuration path given. // -> see if there is one in the current directory File currDir = new File("."); File[] candidates = currDir.listFiles(new FilenameFilter() { @Override public boolean accept(final File dir, final String name) { return name != null && name.endsWith(".yaml"); } }); if (candidates == null || candidates.length == 0) { System.out.println( "No configuration file has been found in current directory.\n" + "Copying default."); File outFile = generateDefaultConf(localJarPath); confPath = new Path(outFile.toURI()); } else { if (candidates.length > 1) { System.out.println("Multiple .yaml configuration files were found in the current directory\n" + "Please specify one explicitly"); System.exit(1); } else if (candidates.length == 1) { confPath = new Path(candidates[0].toURI()); } } } List<File> shipFiles = new ArrayList<File>(); // path to directory to ship if (cmd.hasOption(SHIP_PATH.getOpt())) { String shipPath = cmd.getOptionValue(SHIP_PATH.getOpt()); File shipDir = new File(shipPath); if (shipDir.isDirectory()) { shipFiles = new ArrayList<File>(Arrays.asList(shipDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return !(name.equals(".") || name.equals("..")); } }))); } else { LOG.warn("Ship directory is not a directory!"); } } boolean hasLog4j = false; //check if there is a log4j file if (confDirPath.length() > 0) { File l4j = new File(confDirPath + "/log4j.properties"); if (l4j.exists()) { shipFiles.add(l4j); hasLog4j = true; } } // queue String queue = "default"; if (cmd.hasOption(QUEUE.getOpt())) { queue = cmd.getOptionValue(QUEUE.getOpt()); } // JobManager Memory int jmMemory = 512; if (cmd.hasOption(JM_MEMORY.getOpt())) { jmMemory = Integer.valueOf(cmd.getOptionValue(JM_MEMORY.getOpt())); } // Task Managers memory int tmMemory = 1024; if (cmd.hasOption(TM_MEMORY.getOpt())) { tmMemory = Integer.valueOf(cmd.getOptionValue(TM_MEMORY.getOpt())); } // Task Managers vcores int tmCores = 1; if (cmd.hasOption(TM_CORES.getOpt())) { tmCores = Integer.valueOf(cmd.getOptionValue(TM_CORES.getOpt())); } Utils.getFlinkConfiguration(confPath.toUri().getPath()); int jmPort = GlobalConfiguration.getInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, 0); if (jmPort == 0) { LOG.warn("Unable to find job manager port in configuration!"); jmPort = ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT; } conf = Utils.initializeYarnConfiguration(); // intialize HDFS LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path final FileSystem fs = FileSystem.get(conf); if (fs.getScheme().startsWith("file")) { LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the sytem is using the default Hadoop configuration values." + "The Flink YARN client needs to store its files in a distributed file system"); } // Create yarnClient final YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Query cluster for metrics if (cmd.hasOption(QUERY.getOpt())) { showClusterMetrics(yarnClient); } if (!cmd.hasOption(CONTAINER.getOpt())) { LOG.fatal("Missing required argument " + CONTAINER.getOpt()); printUsage(); yarnClient.stop(); System.exit(1); } // TM Count final int taskManagerCount = Integer.valueOf(cmd.getOptionValue(CONTAINER.getOpt())); System.out.println("Using values:"); System.out.println("\tContainer Count = " + taskManagerCount); System.out.println("\tJar Path = " + localJarPath.toUri().getPath()); System.out.println("\tConfiguration file = " + confPath.toUri().getPath()); System.out.println("\tJobManager memory = " + jmMemory); System.out.println("\tTaskManager memory = " + tmMemory); System.out.println("\tTaskManager cores = " + tmCores); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); Resource maxRes = appResponse.getMaximumResourceCapability(); if (tmMemory > maxRes.getMemory() || tmCores > maxRes.getVirtualCores()) { LOG.fatal("The cluster does not have the requested resources for the TaskManagers available!\n" + "Maximum Memory: " + maxRes.getMemory() + ", Maximum Cores: " + tmCores); yarnClient.stop(); System.exit(1); } if (jmMemory > maxRes.getMemory()) { LOG.fatal("The cluster does not have the requested resources for the JobManager available!\n" + "Maximum Memory: " + maxRes.getMemory()); yarnClient.stop(); System.exit(1); } int totalMemoryRequired = jmMemory + tmMemory * taskManagerCount; ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient); if (freeClusterMem.totalFreeMemory < totalMemoryRequired) { LOG.fatal("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. " + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available."); yarnClient.stop(); System.exit(1); } if (tmMemory > freeClusterMem.containerLimit) { LOG.fatal("The requested amount of memory for the TaskManagers (" + tmMemory + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit); yarnClient.stop(); System.exit(1); } if (jmMemory > freeClusterMem.containerLimit) { LOG.fatal("The requested amount of memory for the JobManager (" + jmMemory + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit); yarnClient.stop(); System.exit(1); } // respect custom JVM options in the YAML file final String javaOpts = GlobalConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, ""); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); String amCommand = "$JAVA_HOME/bin/java" + " -Xmx" + Utils.calculateHeapSize(jmMemory) + "M " + javaOpts; if (hasLog4j) { amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties"; } amCommand += " org.apache.flink.yarn.ApplicationMaster" + " " + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stdout.log" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stderr.log"; amContainer.setCommands(Collections.singletonList(amCommand)); System.err.println("amCommand=" + amCommand); // Set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); final ApplicationId appId = appContext.getApplicationId(); // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); Path remotePathJar = Utils.setupLocalResource(conf, fs, appId.toString(), localJarPath, appMasterJar, fs.getHomeDirectory()); Path remotePathConf = Utils.setupLocalResource(conf, fs, appId.toString(), confPath, flinkConf, fs.getHomeDirectory()); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2); localResources.put("flink.jar", appMasterJar); localResources.put("flink-conf.yaml", flinkConf); // setup security tokens (code from apache storm) final Path[] paths = new Path[3 + shipFiles.size()]; StringBuffer envShipFileList = new StringBuffer(); // upload ship files for (int i = 0; i < shipFiles.size(); i++) { File shipFile = shipFiles.get(i); LocalResource shipResources = Records.newRecord(LocalResource.class); Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath()); paths[3 + i] = Utils.setupLocalResource(conf, fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory()); localResources.put(shipFile.getName(), shipResources); envShipFileList.append(paths[3 + i]); if (i + 1 < shipFiles.size()) { envShipFileList.append(','); } } paths[0] = remotePathJar; paths[1] = remotePathConf; paths[2] = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/"); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL); fs.setPermission(paths[2], permission); // set permission for path. Utils.setTokensFor(amContainer, paths, this.conf); amContainer.setLocalResources(localResources); fs.close(); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); Utils.setupEnv(conf, appMasterEnv); // set configuration values appMasterEnv.put(Client.ENV_TM_COUNT, String.valueOf(taskManagerCount)); appMasterEnv.put(Client.ENV_TM_CORES, String.valueOf(tmCores)); appMasterEnv.put(Client.ENV_TM_MEMORY, String.valueOf(tmMemory)); appMasterEnv.put(Client.FLINK_JAR_PATH, remotePathJar.toString()); appMasterEnv.put(Client.ENV_APP_ID, appId.toString()); appMasterEnv.put(Client.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString()); appMasterEnv.put(Client.ENV_CLIENT_SHIP_FILES, envShipFileList.toString()); appMasterEnv.put(Client.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName()); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(jmMemory); capability.setVirtualCores(1); appContext.setApplicationName("Flink"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue(queue); // file that we write into the conf/ dir containing the jobManager address. final File addrFile = new File(confDirPath + CliFrontend.JOBMANAGER_ADDRESS_FILE); Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { try { LOG.info("Killing the Flink-YARN application."); yarnClient.killApplication(appId); LOG.info("Deleting files in " + paths[2]); FileSystem shutFS = FileSystem.get(conf); shutFS.delete(paths[2], true); // delete conf and jar file. shutFS.close(); } catch (Exception e) { LOG.warn("Exception while killing the YARN application", e); } try { addrFile.delete(); } catch (Exception e) { LOG.warn("Exception while deleting the jobmanager address file", e); } LOG.info("YARN Client is shutting down"); yarnClient.stop(); } }); LOG.info("Submitting application master " + appId); yarnClient.submitApplication(appContext); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); boolean told = false; char[] el = { '/', '|', '\\', '-' }; int i = 0; while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { if (!told && appState == YarnApplicationState.RUNNING) { System.err.println("Flink JobManager is now running on " + appReport.getHost() + ":" + jmPort); System.err.println("JobManager Web Interface: " + appReport.getTrackingUrl()); // write jobmanager connect information PrintWriter out = new PrintWriter(addrFile); out.println(appReport.getHost() + ":" + jmPort); out.close(); addrFile.setReadable(true, false); // readable for all. told = true; } if (!told) { System.err.print(el[i++] + "\r"); if (i == el.length) { i = 0; } Thread.sleep(500); // wait for the application to switch to RUNNING } else { Thread.sleep(5000); } appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); } LOG.info("Application " + appId + " finished with" + " state " + appState + " at " + appReport.getFinishTime()); if (appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED) { LOG.warn("Application failed. Diagnostics " + appReport.getDiagnostics()); } }
From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java
License:Apache License
/** * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213). *//*from w ww. ja va 2s . c o m*/ @Test(timeout = 100000) // timeout after 100 seconds public void testTaskManagerFailure() { LOG.info("Starting testTaskManagerFailure()"); Runner runner = startWithArgs( new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "-s", "3", // set the slots 3 to check if the vCores are set properly! "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" }, "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION); Assert.assertEquals(2, getRunningContainers()); // ------------------------ Test if JobManager web interface is accessible ------- YarnClient yc = null; try { yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); Assert.assertEquals(1, apps.size()); // Only one running ApplicationReport app = apps.get(0); Assert.assertEquals("customName", app.getName()); String url = app.getTrackingUrl(); if (!url.endsWith("/")) { url += "/"; } if (!url.startsWith("http://")) { url = "http://" + url; } LOG.info("Got application URL from YARN {}", url); String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/"); JsonNode parsedTMs = new ObjectMapper().readTree(response); ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers"); Assert.assertNotNull(taskManagers); Assert.assertEquals(1, taskManagers.size()); Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt()); // get the configuration from webinterface & check if the dynamic properties from YARN show up there. String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config"); Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig); Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value")); Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers")); Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES)); // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface // first, get the hostname/port String oC = outContent.toString(); Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)"); Matcher matches = p.matcher(oC); String hostname = null; String port = null; while (matches.find()) { hostname = matches.group(1).toLowerCase(); port = matches.group(2); } LOG.info("Extracted hostname:port: {} {}", hostname, port); Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)); Assert.assertEquals("unable to find port in " + jsonConfig, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY)); // test logfile access String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log"); Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster")); Assert.assertTrue(logs.contains("Starting JobManager")); Assert.assertTrue(logs.contains("Starting JobManager Web Frontend")); } catch (Throwable e) { LOG.warn("Error while running test", e); Assert.fail(e.getMessage()); } // ------------------------ Kill container with TaskManager and check if vcores are set correctly ------- // find container id of taskManager: ContainerId taskManagerContainer = null; NodeManager nodeManager = null; UserGroupInformation remoteUgi = null; NMTokenIdentifier nmIdent = null; try { remoteUgi = UserGroupInformation.getCurrentUser(); } catch (IOException e) { LOG.warn("Unable to get curr user", e); Assert.fail(); } for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) { NodeManager nm = yarnCluster.getNodeManager(nmId); ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers(); for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) { String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands()); if (command.contains(YarnTaskManager.class.getSimpleName())) { taskManagerContainer = entry.getKey(); nodeManager = nm; nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0); // allow myself to do stuff with the container // remoteUgi.addCredentials(entry.getValue().getCredentials()); remoteUgi.addTokenIdentifier(nmIdent); } } sleep(500); } Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer); Assert.assertNotNull("Illegal state", nodeManager); try { List<NodeReport> nodeReports = yc.getNodeReports(NodeState.RUNNING); // we asked for one node with 2 vcores so we expect 2 vcores int userVcores = 0; for (NodeReport rep : nodeReports) { userVcores += rep.getUsed().getVirtualCores(); } Assert.assertEquals(2, userVcores); } catch (Exception e) { Assert.fail("Test failed: " + e.getMessage()); } yc.stop(); List<ContainerId> toStop = new LinkedList<ContainerId>(); toStop.add(taskManagerContainer); StopContainersRequest scr = StopContainersRequest.newInstance(toStop); try { nodeManager.getNMContext().getContainerManager().stopContainers(scr); } catch (Throwable e) { LOG.warn("Error stopping container", e); Assert.fail("Error stopping container: " + e.getMessage()); } // stateful termination check: // wait until we saw a container being killed and AFTERWARDS a new one launched boolean ok = false; do { LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString()); String o = errContent.toString(); int killedOff = o.indexOf("Container killed by the ApplicationMaster"); if (killedOff != -1) { o = o.substring(killedOff); ok = o.indexOf("Launching TaskManager") > 0; } sleep(1000); } while (!ok); // send "stop" command to command line interface runner.sendStop(); // wait for the thread to stop try { runner.join(1000); } catch (InterruptedException e) { LOG.warn("Interrupted while stopping runner", e); } LOG.warn("stopped"); // ----------- Send output to logger System.setOut(originalStdout); System.setErr(originalStderr); String oC = outContent.toString(); String eC = errContent.toString(); LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC); LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC); // ------ Check if everything happened correctly Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster")); Assert.assertTrue("Expect to see failed container", eC.contains("Container killed by the ApplicationMaster")); Assert.assertTrue("Expect to see new container started", eC.contains("Launching TaskManager") && eC.contains("on host")); // cleanup auth for the subsequent tests. remoteUgi.getTokenIdentifiers().remove(nmIdent); LOG.info("Finished testTaskManagerFailure()"); }
From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java
License:Apache License
private void testDetachedPerJobYarnClusterInternal(String job) { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration);/* ww w . ja v a 2 s . c o m*/ yc.start(); // get temporary folder for writing output of wordcount example File tmpOutFolder = null; try { tmpOutFolder = tmp.newFolder(); } catch (IOException e) { throw new RuntimeException(e); } // get temporary file for reading input data for wordcount example File tmpInFile; try { tmpInFile = tmp.newFile(); FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT); } catch (IOException e) { throw new RuntimeException(e); } Runner runner = startWithArgs( new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt", flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", "-yD", "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly "-ytm", "1024", "-ys", "2", // test requesting slots from YARN. "--yarndetached", job, "--input", tmpInFile.getAbsoluteFile().toString(), "--output", tmpOutFolder.getAbsoluteFile().toString() }, "Job has been submitted with JobID", RunTypes.CLI_FRONTEND); // it should usually be 2, but on slow machines, the number varies Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2); // give the runner some time to detach for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) { try { Thread.sleep(500); } catch (InterruptedException e) { } } Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("CLI Frontend has returned, so the job is running"); // find out the application id and wait until it has finished. try { List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); ApplicationId tmpAppId; if (apps.size() == 1) { // Better method to find the right appId. But sometimes the app is shutting down very fast // Only one running tmpAppId = apps.get(0).getApplicationId(); LOG.info("waiting for the job with appId {} to finish", tmpAppId); // wait until the app has finished while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) { sleep(500); } } else { // get appId by finding the latest finished appid apps = yc.getApplications(); Collections.sort(apps, new Comparator<ApplicationReport>() { @Override public int compare(ApplicationReport o1, ApplicationReport o2) { return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1; } }); tmpAppId = apps.get(0).getApplicationId(); LOG.info("Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray())); } final ApplicationId id = tmpAppId; // now it has finished. // check the output files. File[] listOfOutputFiles = tmpOutFolder.listFiles(); Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles); LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder); // read all output files in output folder to one output string String content = ""; for (File f : listOfOutputFiles) { if (f.isFile()) { content += FileUtils.readFileToString(f) + "\n"; } } //String content = FileUtils.readFileToString(taskmanagerOut); // check for some of the wordcount outputs. Assert.assertTrue("Expected string 'da 5' or '(all,2)' not found in string '" + content + "'", content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)")); Assert.assertTrue("Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'", content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)")); // check if the heap size for the TaskManager was set correctly File jobmanagerLog = YarnTestBase.findFile("..", new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString()); } }); Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog); content = FileUtils.readFileToString(jobmanagerLog); // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE) String expected = "Starting TaskManagers with command: $JAVA_HOME/bin/java -Xms424m -Xmx424m"; Assert.assertTrue( "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'", content.contains(expected)); expected = " (2/2) (attempt #0) to "; Assert.assertTrue("Expected string '" + expected + "' not found in JobManager log." + "This string checks that the job has been started with a parallelism of 2. Log contents: '" + jobmanagerLog + "'", content.contains(expected)); // make sure the detached app is really finished. LOG.info("Checking again that app has finished"); ApplicationReport rep; do { sleep(500); rep = yc.getApplicationReport(id); LOG.info("Got report {}", rep); } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING); } catch (Throwable t) { LOG.warn("Error while detached yarn session was running", t); Assert.fail(t.getMessage()); } }
From source file:org.apache.flink.yarn.YARNSessionFIFOITCase.java
License:Apache License
/** * Test regular operation, including command line parameter parsing. *//*from w w w .j a va2 s. co m*/ @Test(timeout = 60000) // timeout after a minute. public void testDetachedMode() { LOG.info("Starting testDetachedMode()"); addTestAppender(FlinkYarnSessionCli.class, Level.INFO); Runner runner = startWithArgs(new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "--name", "MyCustomName", // test setting a custom name "--detached" }, "Flink JobManager is now running on", RunTypes.YARN_SESSION); checkForLogString("The Flink YARN client has been started in detached mode"); Assert.assertFalse("The runner should detach.", runner.isAlive()); LOG.info("Waiting until two containers are running"); // wait until two containers are running while (getRunningContainers() < 2) { sleep(500); } LOG.info("Two containers are running. Killing the application"); // kill application "externally". try { YarnClient yc = YarnClient.createYarnClient(); yc.init(yarnConfiguration); yc.start(); List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)); Assert.assertEquals(1, apps.size()); // Only one running ApplicationReport app = apps.get(0); Assert.assertEquals("MyCustomName", app.getName()); ApplicationId id = app.getApplicationId(); yc.killApplication(id); while (yc.getApplications(EnumSet.of(YarnApplicationState.KILLED)).size() == 0) { sleep(500); } } catch (Throwable t) { LOG.warn("Killing failed", t); Assert.fail(); } LOG.info("Finished testDetachedMode()"); }
From source file:org.apache.hive.service.server.KillQueryImpl.java
License:Apache License
public static void killChildYarnJobs(Configuration conf, String tag) { try {//from w ww .java 2 s . c o m if (tag == null) { return; } LOG.info("Killing yarn jobs using query tag:" + tag); Set<ApplicationId> childYarnJobs = getChildYarnJobs(conf, tag); if (!childYarnJobs.isEmpty()) { YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); for (ApplicationId app : childYarnJobs) { yarnClient.killApplication(app); } } } catch (IOException | YarnException ye) { LOG.warn("Exception occurred while killing child job({})", ye); } }
From source file:org.apache.ignite.yarn.IgniteYarnClient.java
License:Apache License
/** * Main methods has one mandatory parameter and one optional parameter. * * @param args Path to jar mandatory parameter and property file is optional. */// ww w .j a v a 2s. co m public static void main(String[] args) throws Exception { checkArguments(args); // Set path to app master jar. String pathAppMasterJar = args[0]; ClusterProperties props = ClusterProperties.from(args.length == 2 ? args[1] : null); YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); FileSystem fs = FileSystem.get(conf); Path ignite; // Load ignite and jar if (props.ignitePath() == null) ignite = getIgnite(props, fs); else ignite = new Path(props.ignitePath()); // Upload the jar file to HDFS. Path appJar = IgniteYarnUtils.copyLocalToHdfs(fs, pathAppMasterJar, props.igniteWorkDir() + File.separator + IgniteYarnUtils.JAR_NAME); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); amContainer.setCommands(Collections .singletonList(Environment.JAVA_HOME.$() + "/bin/java -Xmx512m " + ApplicationMaster.class.getName() + IgniteYarnUtils.SPACE + ignite.toUri() + IgniteYarnUtils.YARN_LOG_OUT)); // Setup jar for ApplicationMaster LocalResource appMasterJar = IgniteYarnUtils.setupFile(appJar, fs, LocalResourceType.FILE); amContainer.setLocalResources(Collections.singletonMap(IgniteYarnUtils.JAR_NAME, appMasterJar)); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = props.toEnvs(); setupAppMasterEnv(appMasterEnv, conf); amContainer.setEnvironment(appMasterEnv); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials creds = new Credentials(); String tokRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokRenewer == null || tokRenewer.length() == 0) throw new IOException("Master Kerberos principal for the RM is not set."); log.info("Found RM principal: " + tokRenewer); final Token<?> tokens[] = fs.addDelegationTokens(tokRenewer, creds); if (tokens != null) log.info("File system delegation tokens: " + Arrays.toString(tokens)); amContainer.setTokens(IgniteYarnUtils.createTokenBuffer(creds)); } // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(512); capability.setVirtualCores(1); // Finally, set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName("ignition"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue("default"); // queue // Submit application ApplicationId appId = appContext.getApplicationId(); yarnClient.submitApplication(appContext); log.log(Level.INFO, "Submitted application. Application id: {0}", appId); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState == YarnApplicationState.NEW || appState == YarnApplicationState.NEW_SAVING || appState == YarnApplicationState.SUBMITTED || appState == YarnApplicationState.ACCEPTED) { TimeUnit.SECONDS.sleep(1L); appReport = yarnClient.getApplicationReport(appId); if (appState != YarnApplicationState.ACCEPTED && appReport.getYarnApplicationState() == YarnApplicationState.ACCEPTED) log.log(Level.INFO, "Application {0} is ACCEPTED.", appId); appState = appReport.getYarnApplicationState(); } log.log(Level.INFO, "Application {0} is {1}.", new Object[] { appId, appState }); }
From source file:org.apache.metron.maas.service.MaasIntegrationTest.java
License:Apache License
public void testDSShell(boolean haveDomain) throws Exception { MaaSConfig config = new MaaSConfig() { {//from w w w . j ava2 s . c om setServiceRoot("/maas/service"); setQueueConfig(new HashMap<String, Object>() { { put(ZKQueue.ZK_PATH, "/maas/queue"); } }); } }; String configRoot = "/maas/config"; byte[] configData = ConfigUtil.INSTANCE.toBytes(config); try { client.setData().forPath(configRoot, configData); } catch (KeeperException.NoNodeException e) { client.create().creatingParentsIfNeeded().forPath(configRoot, configData); } String[] args = { "--jar", yarnComponent.getAppMasterJar(), "--zk_quorum", zkServerComponent.getConnectionString(), "--zk_root", configRoot, "--master_memory", "512", "--master_vcores", "2", }; if (haveDomain) { String[] domainArgs = { "--domain", "TEST_DOMAIN", "--view_acls", "reader_user reader_group", "--modify_acls", "writer_user writer_group", "--create" }; List<String> argsList = new ArrayList<String>(Arrays.asList(args)); argsList.addAll(Arrays.asList(domainArgs)); args = argsList.toArray(new String[argsList.size()]); } YarnConfiguration conf = yarnComponent.getConfig(); LOG.info("Initializing DS Client"); final Client client = new Client(new Configuration(conf)); boolean initSuccess = client.init(args); Assert.assertTrue(initSuccess); LOG.info("Running DS Client"); final AtomicBoolean result = new AtomicBoolean(false); Thread t = new Thread() { @Override public void run() { try { result.set(client.run()); } catch (Exception e) { throw new RuntimeException(e); } } }; t.start(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(new Configuration(conf)); yarnClient.start(); String hostName = NetUtils.getHostname(); boolean verified = false; String errorMessage = ""; while (!verified) { List<ApplicationReport> apps = yarnClient.getApplications(); if (apps.size() == 0) { Thread.sleep(10); continue; } ApplicationReport appReport = apps.get(0); if (appReport.getHost().equals("N/A")) { Thread.sleep(10); continue; } errorMessage = "Expected host name to start with '" + hostName + "', was '" + appReport.getHost() + "'. Expected rpc port to be '-1', was '" + appReport.getRpcPort() + "'."; if (checkHostname(appReport.getHost()) && appReport.getRpcPort() == -1) { verified = true; } if (appReport.getYarnApplicationState() == YarnApplicationState.FINISHED) { break; } } Assert.assertTrue(errorMessage, verified); FileSystem fs = FileSystem.get(conf); try { new ModelSubmission().execute(FileSystem.get(conf), new String[] { "--name", "dummy", "--version", "1.0", "--zk_quorum", zkServerComponent.getConnectionString(), "--zk_root", configRoot, "--local_model_path", "src/test/resources/maas", "--hdfs_model_path", new Path(fs.getHomeDirectory(), "maas/dummy").toString(), "--num_instances", "1", "--memory", "100", "--mode", "ADD", "--log4j", "src/test/resources/log4j.properties" }); ServiceDiscoverer discoverer = new ServiceDiscoverer(this.client, config.getServiceRoot()); discoverer.start(); { boolean passed = false; for (int i = 0; i < 100; ++i) { try { List<ModelEndpoint> endpoints = discoverer.getEndpoints(new Model("dummy", "1.0")); if (endpoints != null && endpoints.size() == 1) { LOG.trace("Found endpoints: " + endpoints.get(0)); String output = makeRESTcall( new URL(endpoints.get(0).getEndpoint().getUrl() + "/echo/casey")); if (output.contains("casey")) { passed = true; break; } } } catch (Exception e) { } Thread.sleep(2000); } Assert.assertTrue(passed); } { List<ModelEndpoint> endpoints = discoverer.getEndpoints(new Model("dummy", "1.0")); Assert.assertNotNull(endpoints); Assert.assertEquals(1, endpoints.size()); } new ModelSubmission().execute(FileSystem.get(conf), new String[] { "--name", "dummy", "--version", "1.0", "--zk_quorum", zkServerComponent.getConnectionString(), "--zk_root", configRoot, "--num_instances", "1", "--mode", "REMOVE", }); { boolean passed = false; for (int i = 0; i < 100; ++i) { try { List<ModelEndpoint> endpoints = discoverer.getEndpoints(new Model("dummy", "1.0")); //ensure that the endpoint is dead. if (endpoints == null || endpoints.size() == 0) { passed = true; break; } } catch (Exception e) { } Thread.sleep(2000); } Assert.assertTrue(passed); } } finally { cleanup(); } }
From source file:org.apache.oozie.action.hadoop.LauncherMainHadoopUtils.java
License:Apache License
public static void killChildYarnJobs(Configuration actionConf) { try {/* www . j av a2 s .c o m*/ Set<ApplicationId> childYarnJobs = getChildYarnJobs(actionConf); if (!childYarnJobs.isEmpty()) { System.out.println(); System.out.println("Found [" + childYarnJobs.size() + "] Map-Reduce jobs from this launcher"); System.out.println("Killing existing jobs and starting over:"); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(actionConf); yarnClient.start(); for (ApplicationId app : childYarnJobs) { System.out.print("Killing job [" + app + "] ... "); yarnClient.killApplication(app); System.out.println("Done"); } System.out.println(); } } catch (YarnException ye) { throw new RuntimeException("Exception occurred while killing child job(s)", ye); } catch (IOException ioe) { throw new RuntimeException("Exception occurred while killing child job(s)", ioe); } }
From source file:org.apache.reef.runtime.yarn.driver.unmanaged.UnmanagedAmTest.java
License:Apache License
@Test public void testAmShutdown() throws IOException, YarnException { Assume.assumeTrue("This test requires a YARN Resource Manager to connect to", Boolean.parseBoolean(System.getenv("REEF_TEST_YARN"))); final YarnConfiguration yarnConfig = new YarnConfiguration(); // Start YARN client and register the application final YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(yarnConfig);//from ww w.j a v a 2 s. c o m yarnClient.start(); final ContainerLaunchContext containerContext = Records.newRecord(ContainerLaunchContext.class); containerContext.setCommands(Collections.<String>emptyList()); containerContext.setLocalResources(Collections.<String, LocalResource>emptyMap()); containerContext.setEnvironment(Collections.<String, String>emptyMap()); containerContext.setTokens(getTokens()); final ApplicationSubmissionContext appContext = yarnClient.createApplication() .getApplicationSubmissionContext(); appContext.setApplicationName("REEF_Unmanaged_AM_Test"); appContext.setAMContainerSpec(containerContext); appContext.setUnmanagedAM(true); appContext.setQueue("default"); final ApplicationId applicationId = appContext.getApplicationId(); LOG.log(Level.INFO, "Registered YARN application: {0}", applicationId); yarnClient.submitApplication(appContext); LOG.log(Level.INFO, "YARN application submitted: {0}", applicationId); addToken(yarnClient.getAMRMToken(applicationId)); // Start the AM final AMRMClientAsync<AMRMClient.ContainerRequest> rmClient = AMRMClientAsync.createAMRMClientAsync(1000, this); rmClient.init(yarnConfig); rmClient.start(); final NMClientAsync nmClient = new NMClientAsyncImpl(this); nmClient.init(yarnConfig); nmClient.start(); final RegisterApplicationMasterResponse registration = rmClient .registerApplicationMaster(NetUtils.getHostname(), -1, null); LOG.log(Level.INFO, "Unmanaged AM is running: {0}", registration); rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "Success!", null); LOG.log(Level.INFO, "Unregistering AM: state {0}", rmClient.getServiceState()); // Shutdown the AM rmClient.stop(); nmClient.stop(); // Get the final application report final ApplicationReport appReport = yarnClient.getApplicationReport(applicationId); final YarnApplicationState appState = appReport.getYarnApplicationState(); final FinalApplicationStatus finalAttemptStatus = appReport.getFinalApplicationStatus(); LOG.log(Level.INFO, "Application {0} final attempt {1} status: {2}/{3}", new Object[] { applicationId, appReport.getCurrentApplicationAttemptId(), appState, finalAttemptStatus }); Assert.assertEquals("Application must be in FINISHED state", YarnApplicationState.FINISHED, appState); Assert.assertEquals("Final status must be SUCCEEDED", FinalApplicationStatus.SUCCEEDED, finalAttemptStatus); // Shutdown YARN client yarnClient.stop(); }