Example usage for org.apache.hadoop.yarn.client.api YarnClient start

List of usage examples for org.apache.hadoop.yarn.client.api YarnClient start

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.client.api YarnClient start.

Prototype

@Override
public void start() 

Source Link

Usage

From source file:org.apache.flink.yarn.AbstractYarnClusterTest.java

License:Apache License

/**
 * Tests that the cluster retrieval of a finished YARN application fails.
 *///from   w  w w .j  a  va  2 s  .c o m
@Test(expected = ClusterRetrieveException.class)
public void testClusterClientRetrievalOfFinishedYarnApplication() throws Exception {
    final ApplicationId applicationId = ApplicationId.newInstance(System.currentTimeMillis(), 42);
    final ApplicationReport applicationReport = createApplicationReport(applicationId,
            YarnApplicationState.FINISHED, FinalApplicationStatus.SUCCEEDED);

    final YarnClient yarnClient = new TestingYarnClient(
            Collections.singletonMap(applicationId, applicationReport));
    final YarnConfiguration yarnConfiguration = new YarnConfiguration();
    yarnClient.init(yarnConfiguration);
    yarnClient.start();

    final TestingAbstractYarnClusterDescriptor clusterDescriptor = new TestingAbstractYarnClusterDescriptor(
            new Configuration(), yarnConfiguration, temporaryFolder.newFolder().getAbsolutePath(), yarnClient,
            false);

    try {
        clusterDescriptor.retrieve(applicationId);
    } finally {
        clusterDescriptor.close();
    }
}

From source file:org.apache.flink.yarn.Client.java

License:Apache License

public void run(String[] args) throws Exception {

    if (UserGroupInformation.isSecurityEnabled()) {
        throw new RuntimeException("Flink YARN client does not have security support right now."
                + "File a bug, we will fix it asap");
    }/* w  w  w.j a  va2s  .c o m*/
    //Utils.logFilesInCurrentDirectory(LOG);
    //
    //   Command Line Options
    //
    Options options = new Options();
    options.addOption(VERBOSE);
    options.addOption(FLINK_CONF_DIR);
    options.addOption(FLINK_JAR);
    options.addOption(JM_MEMORY);
    options.addOption(TM_MEMORY);
    options.addOption(TM_CORES);
    options.addOption(CONTAINER);
    options.addOption(GEN_CONF);
    options.addOption(QUEUE);
    options.addOption(QUERY);
    options.addOption(SHIP_PATH);

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;
    try {
        cmd = parser.parse(options, args);
    } catch (MissingOptionException moe) {
        System.out.println(moe.getMessage());
        printUsage();
        System.exit(1);
    }

    if (System.getProperty("log4j.configuration") == null) {
        Logger root = Logger.getRootLogger();
        root.removeAllAppenders();
        PatternLayout layout = new PatternLayout("%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n");
        ConsoleAppender appender = new ConsoleAppender(layout, "System.err");
        root.addAppender(appender);
        if (cmd.hasOption(VERBOSE.getOpt())) {
            root.setLevel(Level.DEBUG);
            LOG.debug("CLASSPATH: " + System.getProperty("java.class.path"));
        } else {
            root.setLevel(Level.INFO);
        }
    }

    // Jar Path
    Path localJarPath;
    if (cmd.hasOption(FLINK_JAR.getOpt())) {
        String userPath = cmd.getOptionValue(FLINK_JAR.getOpt());
        if (!userPath.startsWith("file://")) {
            userPath = "file://" + userPath;
        }
        localJarPath = new Path(userPath);
    } else {
        localJarPath = new Path(
                "file://" + Client.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    }

    if (cmd.hasOption(GEN_CONF.getOpt())) {
        LOG.info("Placing default configuration in current directory");
        File outFile = generateDefaultConf(localJarPath);
        LOG.info("File written to " + outFile.getAbsolutePath());
        System.exit(0);
    }

    // Conf Path 
    Path confPath = null;
    String confDirPath = "";
    if (cmd.hasOption(FLINK_CONF_DIR.getOpt())) {
        confDirPath = cmd.getOptionValue(FLINK_CONF_DIR.getOpt()) + "/";
        File confFile = new File(confDirPath + CONFIG_FILE_NAME);
        if (!confFile.exists()) {
            LOG.fatal("Unable to locate configuration file in " + confFile);
            System.exit(1);
        }
        confPath = new Path(confFile.getAbsolutePath());
    } else {
        System.out.println("No configuration file has been specified");

        // no configuration path given.
        // -> see if there is one in the current directory
        File currDir = new File(".");
        File[] candidates = currDir.listFiles(new FilenameFilter() {
            @Override
            public boolean accept(final File dir, final String name) {
                return name != null && name.endsWith(".yaml");
            }
        });
        if (candidates == null || candidates.length == 0) {
            System.out.println(
                    "No configuration file has been found in current directory.\n" + "Copying default.");
            File outFile = generateDefaultConf(localJarPath);
            confPath = new Path(outFile.toURI());
        } else {
            if (candidates.length > 1) {
                System.out.println("Multiple .yaml configuration files were found in the current directory\n"
                        + "Please specify one explicitly");
                System.exit(1);
            } else if (candidates.length == 1) {
                confPath = new Path(candidates[0].toURI());
            }
        }
    }
    List<File> shipFiles = new ArrayList<File>();
    // path to directory to ship
    if (cmd.hasOption(SHIP_PATH.getOpt())) {
        String shipPath = cmd.getOptionValue(SHIP_PATH.getOpt());
        File shipDir = new File(shipPath);
        if (shipDir.isDirectory()) {
            shipFiles = new ArrayList<File>(Arrays.asList(shipDir.listFiles(new FilenameFilter() {
                @Override
                public boolean accept(File dir, String name) {
                    return !(name.equals(".") || name.equals(".."));
                }
            })));
        } else {
            LOG.warn("Ship directory is not a directory!");
        }
    }
    boolean hasLog4j = false;
    //check if there is a log4j file
    if (confDirPath.length() > 0) {
        File l4j = new File(confDirPath + "/log4j.properties");
        if (l4j.exists()) {
            shipFiles.add(l4j);
            hasLog4j = true;
        }
    }

    // queue
    String queue = "default";
    if (cmd.hasOption(QUEUE.getOpt())) {
        queue = cmd.getOptionValue(QUEUE.getOpt());
    }

    // JobManager Memory
    int jmMemory = 512;
    if (cmd.hasOption(JM_MEMORY.getOpt())) {
        jmMemory = Integer.valueOf(cmd.getOptionValue(JM_MEMORY.getOpt()));
    }

    // Task Managers memory
    int tmMemory = 1024;
    if (cmd.hasOption(TM_MEMORY.getOpt())) {
        tmMemory = Integer.valueOf(cmd.getOptionValue(TM_MEMORY.getOpt()));
    }

    // Task Managers vcores
    int tmCores = 1;
    if (cmd.hasOption(TM_CORES.getOpt())) {
        tmCores = Integer.valueOf(cmd.getOptionValue(TM_CORES.getOpt()));
    }
    Utils.getFlinkConfiguration(confPath.toUri().getPath());
    int jmPort = GlobalConfiguration.getInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, 0);
    if (jmPort == 0) {
        LOG.warn("Unable to find job manager port in configuration!");
        jmPort = ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT;
    }
    conf = Utils.initializeYarnConfiguration();

    // intialize HDFS
    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem 
    // Create a local resource to point to the destination jar path 
    final FileSystem fs = FileSystem.get(conf);

    if (fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the sytem is using the default Hadoop configuration values."
                + "The Flink YARN client needs to store its files in a distributed file system");
    }

    // Create yarnClient
    final YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    // Query cluster for metrics
    if (cmd.hasOption(QUERY.getOpt())) {
        showClusterMetrics(yarnClient);
    }
    if (!cmd.hasOption(CONTAINER.getOpt())) {
        LOG.fatal("Missing required argument " + CONTAINER.getOpt());
        printUsage();
        yarnClient.stop();
        System.exit(1);
    }

    // TM Count
    final int taskManagerCount = Integer.valueOf(cmd.getOptionValue(CONTAINER.getOpt()));

    System.out.println("Using values:");
    System.out.println("\tContainer Count = " + taskManagerCount);
    System.out.println("\tJar Path = " + localJarPath.toUri().getPath());
    System.out.println("\tConfiguration file = " + confPath.toUri().getPath());
    System.out.println("\tJobManager memory = " + jmMemory);
    System.out.println("\tTaskManager memory = " + tmMemory);
    System.out.println("\tTaskManager cores = " + tmCores);

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    Resource maxRes = appResponse.getMaximumResourceCapability();
    if (tmMemory > maxRes.getMemory() || tmCores > maxRes.getVirtualCores()) {
        LOG.fatal("The cluster does not have the requested resources for the TaskManagers available!\n"
                + "Maximum Memory: " + maxRes.getMemory() + ", Maximum Cores: " + tmCores);
        yarnClient.stop();
        System.exit(1);
    }
    if (jmMemory > maxRes.getMemory()) {
        LOG.fatal("The cluster does not have the requested resources for the JobManager available!\n"
                + "Maximum Memory: " + maxRes.getMemory());
        yarnClient.stop();
        System.exit(1);
    }
    int totalMemoryRequired = jmMemory + tmMemory * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.fatal("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available.");
        yarnClient.stop();
        System.exit(1);
    }
    if (tmMemory > freeClusterMem.containerLimit) {
        LOG.fatal("The requested amount of memory for the TaskManagers (" + tmMemory + "MB) is more than "
                + "the largest possible YARN container: " + freeClusterMem.containerLimit);
        yarnClient.stop();
        System.exit(1);
    }
    if (jmMemory > freeClusterMem.containerLimit) {
        LOG.fatal("The requested amount of memory for the JobManager (" + jmMemory + "MB) is more than "
                + "the largest possible YARN container: " + freeClusterMem.containerLimit);
        yarnClient.stop();
        System.exit(1);
    }

    // respect custom JVM options in the YAML file
    final String javaOpts = GlobalConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, "");

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    String amCommand = "$JAVA_HOME/bin/java" + " -Xmx" + Utils.calculateHeapSize(jmMemory) + "M " + javaOpts;
    if (hasLog4j) {
        amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                + "/jobmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties";
    }
    amCommand += " org.apache.flink.yarn.ApplicationMaster" + " " + " 1>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stdout.log" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stderr.log";
    amContainer.setCommands(Collections.singletonList(amCommand));

    System.err.println("amCommand=" + amCommand);

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    final ApplicationId appId = appContext.getApplicationId();

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(conf, fs, appId.toString(), localJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(conf, fs, appId.toString(), confPath, flinkConf,
            fs.getHomeDirectory());
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2);
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    // setup security tokens (code from apache storm)
    final Path[] paths = new Path[3 + shipFiles.size()];
    StringBuffer envShipFileList = new StringBuffer();
    // upload ship files
    for (int i = 0; i < shipFiles.size(); i++) {
        File shipFile = shipFiles.get(i);
        LocalResource shipResources = Records.newRecord(LocalResource.class);
        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        paths[3 + i] = Utils.setupLocalResource(conf, fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());
        localResources.put(shipFile.getName(), shipResources);

        envShipFileList.append(paths[3 + i]);
        if (i + 1 < shipFiles.size()) {
            envShipFileList.append(',');
        }
    }

    paths[0] = remotePathJar;
    paths[1] = remotePathConf;
    paths[2] = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");
    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL);
    fs.setPermission(paths[2], permission); // set permission for path.
    Utils.setTokensFor(amContainer, paths, this.conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    Utils.setupEnv(conf, appMasterEnv);
    // set configuration values
    appMasterEnv.put(Client.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(Client.ENV_TM_CORES, String.valueOf(tmCores));
    appMasterEnv.put(Client.ENV_TM_MEMORY, String.valueOf(tmMemory));
    appMasterEnv.put(Client.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(Client.ENV_APP_ID, appId.toString());
    appMasterEnv.put(Client.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(Client.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(Client.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName());

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jmMemory);
    capability.setVirtualCores(1);

    appContext.setApplicationName("Flink"); // application name
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue(queue);

    // file that we write into the conf/ dir containing the jobManager address.
    final File addrFile = new File(confDirPath + CliFrontend.JOBMANAGER_ADDRESS_FILE);

    Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override
        public void run() {
            try {
                LOG.info("Killing the Flink-YARN application.");
                yarnClient.killApplication(appId);
                LOG.info("Deleting files in " + paths[2]);
                FileSystem shutFS = FileSystem.get(conf);
                shutFS.delete(paths[2], true); // delete conf and jar file.
                shutFS.close();
            } catch (Exception e) {
                LOG.warn("Exception while killing the YARN application", e);
            }
            try {
                addrFile.delete();
            } catch (Exception e) {
                LOG.warn("Exception while deleting the jobmanager address file", e);
            }
            LOG.info("YARN Client is shutting down");
            yarnClient.stop();
        }
    });

    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);
    ApplicationReport appReport = yarnClient.getApplicationReport(appId);
    YarnApplicationState appState = appReport.getYarnApplicationState();
    boolean told = false;
    char[] el = { '/', '|', '\\', '-' };
    int i = 0;
    while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED
            && appState != YarnApplicationState.FAILED) {
        if (!told && appState == YarnApplicationState.RUNNING) {
            System.err.println("Flink JobManager is now running on " + appReport.getHost() + ":" + jmPort);
            System.err.println("JobManager Web Interface: " + appReport.getTrackingUrl());
            // write jobmanager connect information

            PrintWriter out = new PrintWriter(addrFile);
            out.println(appReport.getHost() + ":" + jmPort);
            out.close();
            addrFile.setReadable(true, false); // readable for all.
            told = true;
        }
        if (!told) {
            System.err.print(el[i++] + "\r");
            if (i == el.length) {
                i = 0;
            }
            Thread.sleep(500); // wait for the application to switch to RUNNING
        } else {
            Thread.sleep(5000);
        }

        appReport = yarnClient.getApplicationReport(appId);
        appState = appReport.getYarnApplicationState();
    }

    LOG.info("Application " + appId + " finished with" + " state " + appState + " at "
            + appReport.getFinishTime());
    if (appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED) {
        LOG.warn("Application failed. Diagnostics " + appReport.getDiagnostics());
    }

}

From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java

License:Apache License

/**
 * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213).
 *//*from w  ww. ja  va  2s  . c  o  m*/
@Test(timeout = 100000) // timeout after 100 seconds
public void testTaskManagerFailure() {
    LOG.info("Starting testTaskManagerFailure()");
    Runner runner = startWithArgs(
            new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n",
                    "1", "-jm", "768", "-tm", "1024", "-s", "3", // set the slots 3 to check if the vCores are set properly!
                    "-nm", "customName", "-Dfancy-configuration-value=veryFancy",
                    "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" },
            "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION);

    Assert.assertEquals(2, getRunningContainers());

    // ------------------------ Test if JobManager web interface is accessible -------

    YarnClient yc = null;
    try {
        yc = YarnClient.createYarnClient();
        yc.init(yarnConfiguration);
        yc.start();

        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        Assert.assertEquals(1, apps.size()); // Only one running
        ApplicationReport app = apps.get(0);
        Assert.assertEquals("customName", app.getName());
        String url = app.getTrackingUrl();
        if (!url.endsWith("/")) {
            url += "/";
        }
        if (!url.startsWith("http://")) {
            url = "http://" + url;
        }
        LOG.info("Got application URL from YARN {}", url);

        String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");

        JsonNode parsedTMs = new ObjectMapper().readTree(response);
        ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers");
        Assert.assertNotNull(taskManagers);
        Assert.assertEquals(1, taskManagers.size());
        Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt());

        // get the configuration from webinterface & check if the dynamic properties from YARN show up there.
        String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
        Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig);

        Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
        Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));
        Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES));

        // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
        // first, get the hostname/port
        String oC = outContent.toString();
        Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
        Matcher matches = p.matcher(oC);
        String hostname = null;
        String port = null;
        while (matches.find()) {
            hostname = matches.group(1).toLowerCase();
            port = matches.group(2);
        }
        LOG.info("Extracted hostname:port: {} {}", hostname, port);

        Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname,
                parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
        Assert.assertEquals("unable to find port in " + jsonConfig, port,
                parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));

        // test logfile access
        String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
        Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster"));
        Assert.assertTrue(logs.contains("Starting JobManager"));
        Assert.assertTrue(logs.contains("Starting JobManager Web Frontend"));
    } catch (Throwable e) {
        LOG.warn("Error while running test", e);
        Assert.fail(e.getMessage());
    }

    // ------------------------ Kill container with TaskManager and check if vcores are set correctly -------

    // find container id of taskManager:
    ContainerId taskManagerContainer = null;
    NodeManager nodeManager = null;
    UserGroupInformation remoteUgi = null;
    NMTokenIdentifier nmIdent = null;
    try {
        remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
        LOG.warn("Unable to get curr user", e);
        Assert.fail();
    }
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
        NodeManager nm = yarnCluster.getNodeManager(nmId);
        ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
        for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
            String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
            if (command.contains(YarnTaskManager.class.getSimpleName())) {
                taskManagerContainer = entry.getKey();
                nodeManager = nm;
                nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
                // allow myself to do stuff with the container
                // remoteUgi.addCredentials(entry.getValue().getCredentials());
                remoteUgi.addTokenIdentifier(nmIdent);
            }
        }
        sleep(500);
    }

    Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
    Assert.assertNotNull("Illegal state", nodeManager);

    try {
        List<NodeReport> nodeReports = yc.getNodeReports(NodeState.RUNNING);

        // we asked for one node with 2 vcores so we expect 2 vcores
        int userVcores = 0;
        for (NodeReport rep : nodeReports) {
            userVcores += rep.getUsed().getVirtualCores();
        }
        Assert.assertEquals(2, userVcores);
    } catch (Exception e) {
        Assert.fail("Test failed: " + e.getMessage());
    }

    yc.stop();

    List<ContainerId> toStop = new LinkedList<ContainerId>();
    toStop.add(taskManagerContainer);
    StopContainersRequest scr = StopContainersRequest.newInstance(toStop);

    try {
        nodeManager.getNMContext().getContainerManager().stopContainers(scr);
    } catch (Throwable e) {
        LOG.warn("Error stopping container", e);
        Assert.fail("Error stopping container: " + e.getMessage());
    }

    // stateful termination check:
    // wait until we saw a container being killed and AFTERWARDS a new one launched
    boolean ok = false;
    do {
        LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());

        String o = errContent.toString();
        int killedOff = o.indexOf("Container killed by the ApplicationMaster");
        if (killedOff != -1) {
            o = o.substring(killedOff);
            ok = o.indexOf("Launching TaskManager") > 0;
        }
        sleep(1000);
    } while (!ok);

    // send "stop" command to command line interface
    runner.sendStop();
    // wait for the thread to stop
    try {
        runner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Interrupted while stopping runner", e);
    }
    LOG.warn("stopped");

    // ----------- Send output to logger
    System.setOut(originalStdout);
    System.setErr(originalStderr);
    String oC = outContent.toString();
    String eC = errContent.toString();
    LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
    LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);

    // ------ Check if everything happened correctly
    Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster"));

    Assert.assertTrue("Expect to see failed container",
            eC.contains("Container killed by the ApplicationMaster"));

    Assert.assertTrue("Expect to see new container started",
            eC.contains("Launching TaskManager") && eC.contains("on host"));

    // cleanup auth for the subsequent tests.
    remoteUgi.getTokenIdentifiers().remove(nmIdent);

    LOG.info("Finished testTaskManagerFailure()");
}

From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java

License:Apache License

private void testDetachedPerJobYarnClusterInternal(String job) {
    YarnClient yc = YarnClient.createYarnClient();
    yc.init(yarnConfiguration);/*  ww w . ja  v a 2  s  . c  o  m*/
    yc.start();

    // get temporary folder for writing output of wordcount example
    File tmpOutFolder = null;
    try {
        tmpOutFolder = tmp.newFolder();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // get temporary file for reading input data for wordcount example
    File tmpInFile;
    try {
        tmpInFile = tmp.newFile();
        FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    Runner runner = startWithArgs(
            new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt",
                    flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", "-yD",
                    "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly
                    "-ytm", "1024", "-ys", "2", // test requesting slots from YARN.
                    "--yarndetached", job, "--input", tmpInFile.getAbsoluteFile().toString(), "--output",
                    tmpOutFolder.getAbsoluteFile().toString() },
            "Job has been submitted with JobID", RunTypes.CLI_FRONTEND);

    // it should usually be 2, but on slow machines, the number varies
    Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2);
    // give the runner some time to detach
    for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) {
        try {
            Thread.sleep(500);
        } catch (InterruptedException e) {
        }
    }
    Assert.assertFalse("The runner should detach.", runner.isAlive());
    LOG.info("CLI Frontend has returned, so the job is running");

    // find out the application id and wait until it has finished.
    try {
        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));

        ApplicationId tmpAppId;
        if (apps.size() == 1) {
            // Better method to find the right appId. But sometimes the app is shutting down very fast
            // Only one running
            tmpAppId = apps.get(0).getApplicationId();

            LOG.info("waiting for the job with appId {} to finish", tmpAppId);
            // wait until the app has finished
            while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) {
                sleep(500);
            }
        } else {
            // get appId by finding the latest finished appid
            apps = yc.getApplications();
            Collections.sort(apps, new Comparator<ApplicationReport>() {
                @Override
                public int compare(ApplicationReport o1, ApplicationReport o2) {
                    return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1;
                }
            });
            tmpAppId = apps.get(0).getApplicationId();
            LOG.info("Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray()));
        }
        final ApplicationId id = tmpAppId;

        // now it has finished.
        // check the output files.
        File[] listOfOutputFiles = tmpOutFolder.listFiles();

        Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles);
        LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder);

        // read all output files in output folder to one output string
        String content = "";
        for (File f : listOfOutputFiles) {
            if (f.isFile()) {
                content += FileUtils.readFileToString(f) + "\n";
            }
        }
        //String content = FileUtils.readFileToString(taskmanagerOut);
        // check for some of the wordcount outputs.
        Assert.assertTrue("Expected string 'da 5' or '(all,2)' not found in string '" + content + "'",
                content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)"));
        Assert.assertTrue("Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'",
                content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)"));

        // check if the heap size for the TaskManager was set correctly
        File jobmanagerLog = YarnTestBase.findFile("..", new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString());
            }
        });
        Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog);
        content = FileUtils.readFileToString(jobmanagerLog);
        // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE)
        String expected = "Starting TaskManagers with command: $JAVA_HOME/bin/java -Xms424m -Xmx424m";
        Assert.assertTrue(
                "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'",
                content.contains(expected));
        expected = " (2/2) (attempt #0) to ";
        Assert.assertTrue("Expected string '" + expected + "' not found in JobManager log."
                + "This string checks that the job has been started with a parallelism of 2. Log contents: '"
                + jobmanagerLog + "'", content.contains(expected));

        // make sure the detached app is really finished.
        LOG.info("Checking again that app has finished");
        ApplicationReport rep;
        do {
            sleep(500);
            rep = yc.getApplicationReport(id);
            LOG.info("Got report {}", rep);
        } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING);

    } catch (Throwable t) {
        LOG.warn("Error while detached yarn session was running", t);
        Assert.fail(t.getMessage());
    }
}

From source file:org.apache.flink.yarn.YARNSessionFIFOITCase.java

License:Apache License

/**
 * Test regular operation, including command line parameter parsing.
 *//*from   w w w  .j a  va2 s. co m*/
@Test(timeout = 60000) // timeout after a minute.
public void testDetachedMode() {
    LOG.info("Starting testDetachedMode()");
    addTestAppender(FlinkYarnSessionCli.class, Level.INFO);
    Runner runner = startWithArgs(new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t",
            flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", "--name", "MyCustomName", // test setting a custom name
            "--detached" }, "Flink JobManager is now running on", RunTypes.YARN_SESSION);

    checkForLogString("The Flink YARN client has been started in detached mode");

    Assert.assertFalse("The runner should detach.", runner.isAlive());

    LOG.info("Waiting until two containers are running");
    // wait until two containers are running
    while (getRunningContainers() < 2) {
        sleep(500);
    }
    LOG.info("Two containers are running. Killing the application");

    // kill application "externally".
    try {
        YarnClient yc = YarnClient.createYarnClient();
        yc.init(yarnConfiguration);
        yc.start();
        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        Assert.assertEquals(1, apps.size()); // Only one running
        ApplicationReport app = apps.get(0);

        Assert.assertEquals("MyCustomName", app.getName());
        ApplicationId id = app.getApplicationId();
        yc.killApplication(id);

        while (yc.getApplications(EnumSet.of(YarnApplicationState.KILLED)).size() == 0) {
            sleep(500);
        }
    } catch (Throwable t) {
        LOG.warn("Killing failed", t);
        Assert.fail();
    }

    LOG.info("Finished testDetachedMode()");
}

From source file:org.apache.hive.service.server.KillQueryImpl.java

License:Apache License

public static void killChildYarnJobs(Configuration conf, String tag) {
    try {//from   w  ww .java 2 s .  c o m
        if (tag == null) {
            return;
        }
        LOG.info("Killing yarn jobs using query tag:" + tag);
        Set<ApplicationId> childYarnJobs = getChildYarnJobs(conf, tag);
        if (!childYarnJobs.isEmpty()) {
            YarnClient yarnClient = YarnClient.createYarnClient();
            yarnClient.init(conf);
            yarnClient.start();
            for (ApplicationId app : childYarnJobs) {
                yarnClient.killApplication(app);
            }
        }
    } catch (IOException | YarnException ye) {
        LOG.warn("Exception occurred while killing child job({})", ye);
    }
}

From source file:org.apache.ignite.yarn.IgniteYarnClient.java

License:Apache License

/**
 * Main methods has one mandatory parameter and one optional parameter.
 *
 * @param args Path to jar mandatory parameter and property file is optional.
 *///  ww  w  .j a v  a  2s. co  m
public static void main(String[] args) throws Exception {
    checkArguments(args);

    // Set path to app master jar.
    String pathAppMasterJar = args[0];

    ClusterProperties props = ClusterProperties.from(args.length == 2 ? args[1] : null);

    YarnConfiguration conf = new YarnConfiguration();
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();

    FileSystem fs = FileSystem.get(conf);

    Path ignite;

    // Load ignite and jar
    if (props.ignitePath() == null)
        ignite = getIgnite(props, fs);
    else
        ignite = new Path(props.ignitePath());

    // Upload the jar file to HDFS.
    Path appJar = IgniteYarnUtils.copyLocalToHdfs(fs, pathAppMasterJar,
            props.igniteWorkDir() + File.separator + IgniteYarnUtils.JAR_NAME);

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    amContainer.setCommands(Collections
            .singletonList(Environment.JAVA_HOME.$() + "/bin/java -Xmx512m " + ApplicationMaster.class.getName()
                    + IgniteYarnUtils.SPACE + ignite.toUri() + IgniteYarnUtils.YARN_LOG_OUT));

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = IgniteYarnUtils.setupFile(appJar, fs, LocalResourceType.FILE);

    amContainer.setLocalResources(Collections.singletonMap(IgniteYarnUtils.JAR_NAME, appMasterJar));

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = props.toEnvs();

    setupAppMasterEnv(appMasterEnv, conf);

    amContainer.setEnvironment(appMasterEnv);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials creds = new Credentials();

        String tokRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);

        if (tokRenewer == null || tokRenewer.length() == 0)
            throw new IOException("Master Kerberos principal for the RM is not set.");

        log.info("Found RM principal: " + tokRenewer);

        final Token<?> tokens[] = fs.addDelegationTokens(tokRenewer, creds);

        if (tokens != null)
            log.info("File system delegation tokens: " + Arrays.toString(tokens));

        amContainer.setTokens(IgniteYarnUtils.createTokenBuffer(creds));
    }

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(512);
    capability.setVirtualCores(1);

    // Finally, set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationName("ignition"); // application name
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue("default"); // queue

    // Submit application
    ApplicationId appId = appContext.getApplicationId();

    yarnClient.submitApplication(appContext);

    log.log(Level.INFO, "Submitted application. Application id: {0}", appId);

    ApplicationReport appReport = yarnClient.getApplicationReport(appId);
    YarnApplicationState appState = appReport.getYarnApplicationState();

    while (appState == YarnApplicationState.NEW || appState == YarnApplicationState.NEW_SAVING
            || appState == YarnApplicationState.SUBMITTED || appState == YarnApplicationState.ACCEPTED) {
        TimeUnit.SECONDS.sleep(1L);

        appReport = yarnClient.getApplicationReport(appId);

        if (appState != YarnApplicationState.ACCEPTED
                && appReport.getYarnApplicationState() == YarnApplicationState.ACCEPTED)
            log.log(Level.INFO, "Application {0} is ACCEPTED.", appId);

        appState = appReport.getYarnApplicationState();
    }

    log.log(Level.INFO, "Application {0} is {1}.", new Object[] { appId, appState });
}

From source file:org.apache.metron.maas.service.MaasIntegrationTest.java

License:Apache License

public void testDSShell(boolean haveDomain) throws Exception {
    MaaSConfig config = new MaaSConfig() {
        {//from  w  w  w  .  j  ava2 s  .  c  om
            setServiceRoot("/maas/service");
            setQueueConfig(new HashMap<String, Object>() {
                {
                    put(ZKQueue.ZK_PATH, "/maas/queue");
                }
            });
        }
    };
    String configRoot = "/maas/config";
    byte[] configData = ConfigUtil.INSTANCE.toBytes(config);
    try {
        client.setData().forPath(configRoot, configData);
    } catch (KeeperException.NoNodeException e) {
        client.create().creatingParentsIfNeeded().forPath(configRoot, configData);
    }
    String[] args = { "--jar", yarnComponent.getAppMasterJar(), "--zk_quorum",
            zkServerComponent.getConnectionString(), "--zk_root", configRoot, "--master_memory", "512",
            "--master_vcores", "2", };
    if (haveDomain) {
        String[] domainArgs = { "--domain", "TEST_DOMAIN", "--view_acls", "reader_user reader_group",
                "--modify_acls", "writer_user writer_group", "--create" };
        List<String> argsList = new ArrayList<String>(Arrays.asList(args));
        argsList.addAll(Arrays.asList(domainArgs));
        args = argsList.toArray(new String[argsList.size()]);
    }

    YarnConfiguration conf = yarnComponent.getConfig();
    LOG.info("Initializing DS Client");
    final Client client = new Client(new Configuration(conf));
    boolean initSuccess = client.init(args);
    Assert.assertTrue(initSuccess);
    LOG.info("Running DS Client");
    final AtomicBoolean result = new AtomicBoolean(false);
    Thread t = new Thread() {
        @Override
        public void run() {
            try {
                result.set(client.run());
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    };
    t.start();

    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(new Configuration(conf));
    yarnClient.start();
    String hostName = NetUtils.getHostname();

    boolean verified = false;
    String errorMessage = "";
    while (!verified) {
        List<ApplicationReport> apps = yarnClient.getApplications();
        if (apps.size() == 0) {
            Thread.sleep(10);
            continue;
        }
        ApplicationReport appReport = apps.get(0);
        if (appReport.getHost().equals("N/A")) {
            Thread.sleep(10);
            continue;
        }
        errorMessage = "Expected host name to start with '" + hostName + "', was '" + appReport.getHost()
                + "'. Expected rpc port to be '-1', was '" + appReport.getRpcPort() + "'.";
        if (checkHostname(appReport.getHost()) && appReport.getRpcPort() == -1) {
            verified = true;
        }
        if (appReport.getYarnApplicationState() == YarnApplicationState.FINISHED) {
            break;
        }
    }
    Assert.assertTrue(errorMessage, verified);
    FileSystem fs = FileSystem.get(conf);
    try {
        new ModelSubmission().execute(FileSystem.get(conf),
                new String[] { "--name", "dummy", "--version", "1.0", "--zk_quorum",
                        zkServerComponent.getConnectionString(), "--zk_root", configRoot, "--local_model_path",
                        "src/test/resources/maas", "--hdfs_model_path",
                        new Path(fs.getHomeDirectory(), "maas/dummy").toString(), "--num_instances", "1",
                        "--memory", "100", "--mode", "ADD", "--log4j", "src/test/resources/log4j.properties"

                });
        ServiceDiscoverer discoverer = new ServiceDiscoverer(this.client, config.getServiceRoot());
        discoverer.start();
        {
            boolean passed = false;
            for (int i = 0; i < 100; ++i) {
                try {
                    List<ModelEndpoint> endpoints = discoverer.getEndpoints(new Model("dummy", "1.0"));
                    if (endpoints != null && endpoints.size() == 1) {
                        LOG.trace("Found endpoints: " + endpoints.get(0));
                        String output = makeRESTcall(
                                new URL(endpoints.get(0).getEndpoint().getUrl() + "/echo/casey"));
                        if (output.contains("casey")) {
                            passed = true;
                            break;
                        }
                    }
                } catch (Exception e) {
                }
                Thread.sleep(2000);
            }
            Assert.assertTrue(passed);
        }

        {
            List<ModelEndpoint> endpoints = discoverer.getEndpoints(new Model("dummy", "1.0"));
            Assert.assertNotNull(endpoints);
            Assert.assertEquals(1, endpoints.size());
        }
        new ModelSubmission().execute(FileSystem.get(conf),
                new String[] { "--name", "dummy", "--version", "1.0", "--zk_quorum",
                        zkServerComponent.getConnectionString(), "--zk_root", configRoot, "--num_instances",
                        "1", "--mode", "REMOVE",

                });
        {
            boolean passed = false;
            for (int i = 0; i < 100; ++i) {
                try {
                    List<ModelEndpoint> endpoints = discoverer.getEndpoints(new Model("dummy", "1.0"));
                    //ensure that the endpoint is dead.
                    if (endpoints == null || endpoints.size() == 0) {
                        passed = true;
                        break;
                    }
                } catch (Exception e) {
                }
                Thread.sleep(2000);
            }
            Assert.assertTrue(passed);
        }
    } finally {
        cleanup();
    }
}

From source file:org.apache.oozie.action.hadoop.LauncherMainHadoopUtils.java

License:Apache License

public static void killChildYarnJobs(Configuration actionConf) {
    try {/*  www . j av  a2  s  .c o  m*/
        Set<ApplicationId> childYarnJobs = getChildYarnJobs(actionConf);
        if (!childYarnJobs.isEmpty()) {
            System.out.println();
            System.out.println("Found [" + childYarnJobs.size() + "] Map-Reduce jobs from this launcher");
            System.out.println("Killing existing jobs and starting over:");
            YarnClient yarnClient = YarnClient.createYarnClient();
            yarnClient.init(actionConf);
            yarnClient.start();
            for (ApplicationId app : childYarnJobs) {
                System.out.print("Killing job [" + app + "] ... ");
                yarnClient.killApplication(app);
                System.out.println("Done");
            }
            System.out.println();
        }
    } catch (YarnException ye) {
        throw new RuntimeException("Exception occurred while killing child job(s)", ye);
    } catch (IOException ioe) {
        throw new RuntimeException("Exception occurred while killing child job(s)", ioe);
    }
}

From source file:org.apache.reef.runtime.yarn.driver.unmanaged.UnmanagedAmTest.java

License:Apache License

@Test
public void testAmShutdown() throws IOException, YarnException {

    Assume.assumeTrue("This test requires a YARN Resource Manager to connect to",
            Boolean.parseBoolean(System.getenv("REEF_TEST_YARN")));

    final YarnConfiguration yarnConfig = new YarnConfiguration();

    // Start YARN client and register the application

    final YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(yarnConfig);//from  ww w.j  a  v a  2 s.  c o  m
    yarnClient.start();

    final ContainerLaunchContext containerContext = Records.newRecord(ContainerLaunchContext.class);
    containerContext.setCommands(Collections.<String>emptyList());
    containerContext.setLocalResources(Collections.<String, LocalResource>emptyMap());
    containerContext.setEnvironment(Collections.<String, String>emptyMap());
    containerContext.setTokens(getTokens());

    final ApplicationSubmissionContext appContext = yarnClient.createApplication()
            .getApplicationSubmissionContext();
    appContext.setApplicationName("REEF_Unmanaged_AM_Test");
    appContext.setAMContainerSpec(containerContext);
    appContext.setUnmanagedAM(true);
    appContext.setQueue("default");

    final ApplicationId applicationId = appContext.getApplicationId();
    LOG.log(Level.INFO, "Registered YARN application: {0}", applicationId);

    yarnClient.submitApplication(appContext);

    LOG.log(Level.INFO, "YARN application submitted: {0}", applicationId);

    addToken(yarnClient.getAMRMToken(applicationId));

    // Start the AM

    final AMRMClientAsync<AMRMClient.ContainerRequest> rmClient = AMRMClientAsync.createAMRMClientAsync(1000,
            this);
    rmClient.init(yarnConfig);
    rmClient.start();

    final NMClientAsync nmClient = new NMClientAsyncImpl(this);
    nmClient.init(yarnConfig);
    nmClient.start();

    final RegisterApplicationMasterResponse registration = rmClient
            .registerApplicationMaster(NetUtils.getHostname(), -1, null);

    LOG.log(Level.INFO, "Unmanaged AM is running: {0}", registration);

    rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "Success!", null);

    LOG.log(Level.INFO, "Unregistering AM: state {0}", rmClient.getServiceState());

    // Shutdown the AM

    rmClient.stop();
    nmClient.stop();

    // Get the final application report

    final ApplicationReport appReport = yarnClient.getApplicationReport(applicationId);
    final YarnApplicationState appState = appReport.getYarnApplicationState();
    final FinalApplicationStatus finalAttemptStatus = appReport.getFinalApplicationStatus();

    LOG.log(Level.INFO, "Application {0} final attempt {1} status: {2}/{3}", new Object[] { applicationId,
            appReport.getCurrentApplicationAttemptId(), appState, finalAttemptStatus });

    Assert.assertEquals("Application must be in FINISHED state", YarnApplicationState.FINISHED, appState);
    Assert.assertEquals("Final status must be SUCCEEDED", FinalApplicationStatus.SUCCEEDED, finalAttemptStatus);

    // Shutdown YARN client

    yarnClient.stop();
}