Example usage for org.apache.hadoop.yarn.server.nodemanager NodeManager getNMContext

List of usage examples for org.apache.hadoop.yarn.server.nodemanager NodeManager getNMContext

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.server.nodemanager NodeManager getNMContext.

Prototype

@VisibleForTesting
    public Context getNMContext() 

Source Link

Usage

From source file:com.splicemachine.test.SpliceTestYarnPlatform.java

License:Apache License

private static void waitForNMToRegister(NodeManager nm) throws Exception {
    int attempt = 60;
    ContainerManagerImpl cm = ((ContainerManagerImpl) nm.getNMContext().getContainerManager());
    while (cm.getBlockNewContainerRequestsStatus() && attempt-- > 0) {
        Thread.sleep(2000);//  ww w .  j a va 2 s.c  o m
    }
}

From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java

License:Apache License

/**
 * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213).
 *//* w w  w  . j  a v  a  2 s .c o m*/
@Test(timeout = 100000) // timeout after 100 seconds
public void testTaskManagerFailure() {
    LOG.info("Starting testTaskManagerFailure()");
    Runner runner = startWithArgs(
            new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n",
                    "1", "-jm", "768", "-tm", "1024", "-s", "3", // set the slots 3 to check if the vCores are set properly!
                    "-nm", "customName", "-Dfancy-configuration-value=veryFancy",
                    "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" },
            "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION);

    Assert.assertEquals(2, getRunningContainers());

    // ------------------------ Test if JobManager web interface is accessible -------

    YarnClient yc = null;
    try {
        yc = YarnClient.createYarnClient();
        yc.init(yarnConfiguration);
        yc.start();

        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        Assert.assertEquals(1, apps.size()); // Only one running
        ApplicationReport app = apps.get(0);
        Assert.assertEquals("customName", app.getName());
        String url = app.getTrackingUrl();
        if (!url.endsWith("/")) {
            url += "/";
        }
        if (!url.startsWith("http://")) {
            url = "http://" + url;
        }
        LOG.info("Got application URL from YARN {}", url);

        String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");

        JsonNode parsedTMs = new ObjectMapper().readTree(response);
        ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers");
        Assert.assertNotNull(taskManagers);
        Assert.assertEquals(1, taskManagers.size());
        Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt());

        // get the configuration from webinterface & check if the dynamic properties from YARN show up there.
        String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
        Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig);

        Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
        Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));
        Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES));

        // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
        // first, get the hostname/port
        String oC = outContent.toString();
        Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
        Matcher matches = p.matcher(oC);
        String hostname = null;
        String port = null;
        while (matches.find()) {
            hostname = matches.group(1).toLowerCase();
            port = matches.group(2);
        }
        LOG.info("Extracted hostname:port: {} {}", hostname, port);

        Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname,
                parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
        Assert.assertEquals("unable to find port in " + jsonConfig, port,
                parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));

        // test logfile access
        String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
        Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster"));
        Assert.assertTrue(logs.contains("Starting JobManager"));
        Assert.assertTrue(logs.contains("Starting JobManager Web Frontend"));
    } catch (Throwable e) {
        LOG.warn("Error while running test", e);
        Assert.fail(e.getMessage());
    }

    // ------------------------ Kill container with TaskManager and check if vcores are set correctly -------

    // find container id of taskManager:
    ContainerId taskManagerContainer = null;
    NodeManager nodeManager = null;
    UserGroupInformation remoteUgi = null;
    NMTokenIdentifier nmIdent = null;
    try {
        remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
        LOG.warn("Unable to get curr user", e);
        Assert.fail();
    }
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
        NodeManager nm = yarnCluster.getNodeManager(nmId);
        ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
        for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
            String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
            if (command.contains(YarnTaskManager.class.getSimpleName())) {
                taskManagerContainer = entry.getKey();
                nodeManager = nm;
                nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
                // allow myself to do stuff with the container
                // remoteUgi.addCredentials(entry.getValue().getCredentials());
                remoteUgi.addTokenIdentifier(nmIdent);
            }
        }
        sleep(500);
    }

    Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
    Assert.assertNotNull("Illegal state", nodeManager);

    try {
        List<NodeReport> nodeReports = yc.getNodeReports(NodeState.RUNNING);

        // we asked for one node with 2 vcores so we expect 2 vcores
        int userVcores = 0;
        for (NodeReport rep : nodeReports) {
            userVcores += rep.getUsed().getVirtualCores();
        }
        Assert.assertEquals(2, userVcores);
    } catch (Exception e) {
        Assert.fail("Test failed: " + e.getMessage());
    }

    yc.stop();

    List<ContainerId> toStop = new LinkedList<ContainerId>();
    toStop.add(taskManagerContainer);
    StopContainersRequest scr = StopContainersRequest.newInstance(toStop);

    try {
        nodeManager.getNMContext().getContainerManager().stopContainers(scr);
    } catch (Throwable e) {
        LOG.warn("Error stopping container", e);
        Assert.fail("Error stopping container: " + e.getMessage());
    }

    // stateful termination check:
    // wait until we saw a container being killed and AFTERWARDS a new one launched
    boolean ok = false;
    do {
        LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());

        String o = errContent.toString();
        int killedOff = o.indexOf("Container killed by the ApplicationMaster");
        if (killedOff != -1) {
            o = o.substring(killedOff);
            ok = o.indexOf("Launching TaskManager") > 0;
        }
        sleep(1000);
    } while (!ok);

    // send "stop" command to command line interface
    runner.sendStop();
    // wait for the thread to stop
    try {
        runner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Interrupted while stopping runner", e);
    }
    LOG.warn("stopped");

    // ----------- Send output to logger
    System.setOut(originalStdout);
    System.setErr(originalStderr);
    String oC = outContent.toString();
    String eC = errContent.toString();
    LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
    LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);

    // ------ Check if everything happened correctly
    Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster"));

    Assert.assertTrue("Expect to see failed container",
            eC.contains("Container killed by the ApplicationMaster"));

    Assert.assertTrue("Expect to see new container started",
            eC.contains("Launching TaskManager") && eC.contains("on host"));

    // cleanup auth for the subsequent tests.
    remoteUgi.getTokenIdentifiers().remove(nmIdent);

    LOG.info("Finished testTaskManagerFailure()");
}

From source file:org.apache.flink.yarn.YarnTestBase.java

License:Apache License

public static int getRunningContainers() {
    int count = 0;
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
        NodeManager nm = yarnCluster.getNodeManager(nmId);
        ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
        count += containers.size();//  w ww .  ja v  a2 s .  c o  m
    }
    return count;
}