Example usage for org.apache.hadoop.yarn.api.records NodeReport getUsed

List of usage examples for org.apache.hadoop.yarn.api.records NodeReport getUsed

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records NodeReport getUsed.

Prototype

@Public
@Stable
public abstract Resource getUsed();

Source Link

Document

Get used Resource on the node.

Usage

From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java

License:Apache License

private ClusterResourceDescription getCurrentFreeClusterResources(YarnClient yarnClient)
        throws YarnException, IOException {
    List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);

    int totalFreeMemory = 0;
    int containerLimit = 0;
    int[] nodeManagersFree = new int[nodes.size()];

    for (int i = 0; i < nodes.size(); i++) {
        NodeReport rep = nodes.get(i);
        int free = rep.getCapability().getMemory() - (rep.getUsed() != null ? rep.getUsed().getMemory() : 0);
        nodeManagersFree[i] = free;/*w w  w .j a  va  2  s . c o  m*/
        totalFreeMemory += free;
        if (free > containerLimit) {
            containerLimit = free;
        }
    }
    return new ClusterResourceDescription(totalFreeMemory, containerLimit, nodeManagersFree);
}

From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java

License:Apache License

/**
 * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213).
 *///from   w w  w  .  j  a  v a2 s  .  c  om
@Test(timeout = 100000) // timeout after 100 seconds
public void testTaskManagerFailure() {
    LOG.info("Starting testTaskManagerFailure()");
    Runner runner = startWithArgs(
            new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n",
                    "1", "-jm", "768", "-tm", "1024", "-s", "3", // set the slots 3 to check if the vCores are set properly!
                    "-nm", "customName", "-Dfancy-configuration-value=veryFancy",
                    "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" },
            "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION);

    Assert.assertEquals(2, getRunningContainers());

    // ------------------------ Test if JobManager web interface is accessible -------

    YarnClient yc = null;
    try {
        yc = YarnClient.createYarnClient();
        yc.init(yarnConfiguration);
        yc.start();

        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        Assert.assertEquals(1, apps.size()); // Only one running
        ApplicationReport app = apps.get(0);
        Assert.assertEquals("customName", app.getName());
        String url = app.getTrackingUrl();
        if (!url.endsWith("/")) {
            url += "/";
        }
        if (!url.startsWith("http://")) {
            url = "http://" + url;
        }
        LOG.info("Got application URL from YARN {}", url);

        String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");

        JsonNode parsedTMs = new ObjectMapper().readTree(response);
        ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers");
        Assert.assertNotNull(taskManagers);
        Assert.assertEquals(1, taskManagers.size());
        Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt());

        // get the configuration from webinterface & check if the dynamic properties from YARN show up there.
        String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
        Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig);

        Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
        Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));
        Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES));

        // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
        // first, get the hostname/port
        String oC = outContent.toString();
        Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
        Matcher matches = p.matcher(oC);
        String hostname = null;
        String port = null;
        while (matches.find()) {
            hostname = matches.group(1).toLowerCase();
            port = matches.group(2);
        }
        LOG.info("Extracted hostname:port: {} {}", hostname, port);

        Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname,
                parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
        Assert.assertEquals("unable to find port in " + jsonConfig, port,
                parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));

        // test logfile access
        String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
        Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster"));
        Assert.assertTrue(logs.contains("Starting JobManager"));
        Assert.assertTrue(logs.contains("Starting JobManager Web Frontend"));
    } catch (Throwable e) {
        LOG.warn("Error while running test", e);
        Assert.fail(e.getMessage());
    }

    // ------------------------ Kill container with TaskManager and check if vcores are set correctly -------

    // find container id of taskManager:
    ContainerId taskManagerContainer = null;
    NodeManager nodeManager = null;
    UserGroupInformation remoteUgi = null;
    NMTokenIdentifier nmIdent = null;
    try {
        remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
        LOG.warn("Unable to get curr user", e);
        Assert.fail();
    }
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
        NodeManager nm = yarnCluster.getNodeManager(nmId);
        ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
        for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
            String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
            if (command.contains(YarnTaskManager.class.getSimpleName())) {
                taskManagerContainer = entry.getKey();
                nodeManager = nm;
                nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
                // allow myself to do stuff with the container
                // remoteUgi.addCredentials(entry.getValue().getCredentials());
                remoteUgi.addTokenIdentifier(nmIdent);
            }
        }
        sleep(500);
    }

    Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
    Assert.assertNotNull("Illegal state", nodeManager);

    try {
        List<NodeReport> nodeReports = yc.getNodeReports(NodeState.RUNNING);

        // we asked for one node with 2 vcores so we expect 2 vcores
        int userVcores = 0;
        for (NodeReport rep : nodeReports) {
            userVcores += rep.getUsed().getVirtualCores();
        }
        Assert.assertEquals(2, userVcores);
    } catch (Exception e) {
        Assert.fail("Test failed: " + e.getMessage());
    }

    yc.stop();

    List<ContainerId> toStop = new LinkedList<ContainerId>();
    toStop.add(taskManagerContainer);
    StopContainersRequest scr = StopContainersRequest.newInstance(toStop);

    try {
        nodeManager.getNMContext().getContainerManager().stopContainers(scr);
    } catch (Throwable e) {
        LOG.warn("Error stopping container", e);
        Assert.fail("Error stopping container: " + e.getMessage());
    }

    // stateful termination check:
    // wait until we saw a container being killed and AFTERWARDS a new one launched
    boolean ok = false;
    do {
        LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());

        String o = errContent.toString();
        int killedOff = o.indexOf("Container killed by the ApplicationMaster");
        if (killedOff != -1) {
            o = o.substring(killedOff);
            ok = o.indexOf("Launching TaskManager") > 0;
        }
        sleep(1000);
    } while (!ok);

    // send "stop" command to command line interface
    runner.sendStop();
    // wait for the thread to stop
    try {
        runner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Interrupted while stopping runner", e);
    }
    LOG.warn("stopped");

    // ----------- Send output to logger
    System.setOut(originalStdout);
    System.setErr(originalStderr);
    String oC = outContent.toString();
    String eC = errContent.toString();
    LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
    LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);

    // ------ Check if everything happened correctly
    Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster"));

    Assert.assertTrue("Expect to see failed container",
            eC.contains("Container killed by the ApplicationMaster"));

    Assert.assertTrue("Expect to see new container started",
            eC.contains("Launching TaskManager") && eC.contains("on host"));

    // cleanup auth for the subsequent tests.
    remoteUgi.getTokenIdentifiers().remove(nmIdent);

    LOG.info("Finished testTaskManagerFailure()");
}

From source file:org.apache.sysml.yarn.ropt.YarnClusterAnalyzer.java

License:Apache License

public static double getClusterUtilization() throws IOException {
    double util = 0;

    try {/* w w  w . java 2s  .  c o  m*/
        if (_client == null)
            _client = createYarnClient();
        List<NodeReport> nodesReport = _client.getNodeReports();

        double maxMem = 0;
        double currMem = 0;
        long maxCores = 0;
        long currCores = 0;
        for (NodeReport node : nodesReport) {
            Resource max = node.getCapability();
            Resource used = node.getUsed();
            maxMem += max.getMemory();
            currMem += used.getMemory();
            maxCores += max.getVirtualCores();
            currCores += used.getVirtualCores();
        }

        util = Math.max(Math.min(1, currMem / maxMem), //memory util
                Math.min(1, (double) currCores / maxCores)); //vcore util    
    } catch (Exception ex) {
        throw new IOException(ex);
    }

    return util;
}

From source file:org.apache.sysml.yarn.ropt.YarnClusterAnalyzer.java

License:Apache License

/**
 * Analyzes properties of Yarn cluster and Hadoop configurations.
 * /*from   w  w  w.  jav a  2  s .  c  o m*/
 * @param yarnClient hadoop yarn client
 * @param conf hadoop yarn configuration
 * @param verbose output info to standard output
 */
public static void analyzeYarnCluster(YarnClient yarnClient, YarnConfiguration conf, boolean verbose) {
    try {
        List<NodeReport> nodesReport = yarnClient.getNodeReports();
        if (verbose)
            System.out.println("There are " + nodesReport.size() + " nodes in the cluster");
        if (nodesReport.isEmpty())
            throw new YarnException("There are zero available nodes in the yarn cluster");

        nodesMaxPhySorted = new ArrayList<>(nodesReport.size());
        clusterTotalMem = 0;
        clusterTotalCores = 0;
        clusterTotalNodes = 0;
        minimumMRContainerPhyMB = -1;
        for (NodeReport node : nodesReport) {
            Resource resource = node.getCapability();
            Resource used = node.getUsed();
            if (used == null)
                used = Resource.newInstance(0, 0);
            int mb = resource.getMemory();
            int cores = resource.getVirtualCores();
            if (mb <= 0)
                throw new YarnException("A node has non-positive memory " + mb);

            int myMinMRPhyMB = mb / cores / CPU_HYPER_FACTOR;
            if (minimumMRContainerPhyMB < myMinMRPhyMB)
                minimumMRContainerPhyMB = myMinMRPhyMB; // minimumMRContainerPhyMB needs to be the largest among the mins

            clusterTotalMem += (long) mb * 1024 * 1024;
            nodesMaxPhySorted.add((long) mb * 1024 * 1024);
            clusterTotalCores += cores;
            clusterTotalNodes++;
            if (verbose)
                System.out.println("\t" + node.getNodeId() + " has " + mb + " MB (" + used.getMemory()
                        + " MB used) memory and " + resource.getVirtualCores() + " (" + used.getVirtualCores()
                        + " used) cores");

        }
        Collections.sort(nodesMaxPhySorted, Collections.reverseOrder());

        nodesMaxBudgetSorted = new ArrayList<>(nodesMaxPhySorted.size());
        for (int i = 0; i < nodesMaxPhySorted.size(); i++)
            nodesMaxBudgetSorted.add(ResourceOptimizer.phyToBudget(nodesMaxPhySorted.get(i)));

        _remotePar = nodesReport.size();
        if (_remotePar == 0)
            throw new YarnException("There are no available nodes in the yarn cluster");

        // Now get the default cluster settings
        _remoteMRSortMem = (1024 * 1024) * conf.getLong(MRConfigurationNames.MR_TASK_IO_SORT_MB, 100); //100MB

        //handle jvm max mem (map mem budget is relevant for map-side distcache and parfor)
        //(for robustness we probe both: child and map configuration parameters)
        String javaOpts1 = conf.get(MRConfigurationNames.MR_CHILD_JAVA_OPTS); //internally mapred/mapreduce synonym
        String javaOpts2 = conf.get(MRConfigurationNames.MR_MAP_JAVA_OPTS, null); //internally mapred/mapreduce synonym
        String javaOpts3 = conf.get(MRConfigurationNames.MR_REDUCE_JAVA_OPTS, null); //internally mapred/mapreduce synonym
        if (javaOpts2 != null) //specific value overrides generic
            _remoteJVMMaxMemMap = extractMaxMemoryOpt(javaOpts2);
        else
            _remoteJVMMaxMemMap = extractMaxMemoryOpt(javaOpts1);
        if (javaOpts3 != null) //specific value overrides generic
            _remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts3);
        else
            _remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts1);

        //HDFS blocksize
        String blocksize = conf.get(MRConfigurationNames.DFS_BLOCKSIZE, "134217728");
        _blocksize = Long.parseLong(blocksize);

        minimalPhyAllocate = (long) 1024 * 1024
                * conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
                        YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
        maximumPhyAllocate = (long) 1024 * 1024
                * conf.getInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB,
                        YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB);
        mrAMPhy = (long) conf.getInt(MRConfigurationNames.YARN_APP_MR_AM_RESOURCE_MB, 1536) * 1024 * 1024;

    } catch (Exception e) {
        throw new RuntimeException("Unable to analyze yarn cluster ", e);
    }

    /*
     * This is for AppMaster to query available resource in the cluster during heartbeat 
     * 
    AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
    rmClient.init(conf);
    rmClient.start();
    AllocateResponse response = rmClient.allocate(0);
    int nodeCount = response.getNumClusterNodes();
    Resource resource = response.getAvailableResources();
    List<NodeReport> nodeUpdate = response.getUpdatedNodes();
            
    LOG.info("This is a " + nodeCount + " node cluster with totally " +
    resource.getMemory() + " memory and " + resource.getVirtualCores() + " cores");
    LOG.info(nodereport.size() + " updatedNode reports received");
    for (NodeReport node : nodeUpdate) {
       resource = node.getCapability();
       LOG.info(node.getNodeId() + " updated with " + resource.getMemory() + " memory and " + resource.getVirtualCores() + " cores");
    }*/
}

From source file:org.apache.twill.yarn.PlacementPolicyTestRun.java

License:Apache License

/**
 * Verify the cluster configuration (number and capability of node managers) required for the tests.
 *///from ww  w.java 2 s.co  m
@BeforeClass
public static void verifyClusterCapability() throws InterruptedException {
    // Ignore verifications if it is running against older Hadoop versions which does not support blacklists.
    Assume.assumeTrue(YarnUtils.getHadoopVersion().equals(YarnUtils.HadoopVersions.HADOOP_22));

    // All runnables in this test class use same resource specification for the sake of convenience.
    resource = ResourceSpecification.Builder.with().setVirtualCores(RUNNABLE_CORES)
            .setMemory(RUNNABLE_MEMORY, ResourceSpecification.SizeUnit.MEGA).build();
    twoInstancesResource = ResourceSpecification.Builder.with().setVirtualCores(RUNNABLE_CORES)
            .setMemory(RUNNABLE_MEMORY, ResourceSpecification.SizeUnit.MEGA).setInstances(2).build();

    // The tests need exactly three NodeManagers in the cluster.
    int trials = 0;
    while (trials++ < 20) {
        try {
            nodeReports = TWILL_TESTER.getNodeReports();
            if (nodeReports != null && nodeReports.size() == 3) {
                break;
            }
        } catch (Exception e) {
            LOG.error("Failed to get node reports", e);
        }
        LOG.warn("NodeManagers != 3. {}", nodeReports);
        TimeUnit.SECONDS.sleep(1);
    }

    // All NodeManagers should have enough capacity available to accommodate at least two runnables.
    for (NodeReport nodeReport : nodeReports) {
        Resource capability = nodeReport.getCapability();
        Resource used = nodeReport.getUsed();
        Assert.assertNotNull(capability);
        if (used != null) {
            Assert.assertTrue(2 * resource.getMemorySize() < capability.getMemory() - used.getMemory());
        } else {
            Assert.assertTrue(2 * resource.getMemorySize() < capability.getMemory());
        }
    }
}

From source file:org.apache.twill.yarn.PlacementPolicyTestRun.java

License:Apache License

/**
 * Helper function to verify DISTRIBUTED placement policies.
 * Returns the number of NodeManagers on which runnables got provisioned.
 * @return number of NodeManagers on which runnables got provisioned.
 *///from  w  ww. j  av a 2s. c  o  m
private int getProvisionedNodeManagerCount() throws Exception {
    int provisionedNodeManagerCount = 0;
    for (NodeReport nodeReport : getNodeReports()) {
        Resource used = nodeReport.getUsed();
        if (used != null && used.getMemory() > 0) {
            provisionedNodeManagerCount++;
        }
    }
    return provisionedNodeManagerCount;
}

From source file:org.huahinframework.manager.rest.service.ApplicationService.java

License:Apache License

@Path("/cluster")
@GET/*www  . ja  v a  2s . com*/
@Produces(MediaType.APPLICATION_JSON)
public JSONObject getCluster() {
    JSONObject jsonObject = new JSONObject();

    try {
        GetClusterMetricsRequest metricsRequest = recordFactory
                .newRecordInstance(GetClusterMetricsRequest.class);
        GetClusterMetricsResponse metricsResponse = applicationsManager.getClusterMetrics(metricsRequest);

        jsonObject.put(Response.NUM_NODE_MANAGERS, metricsResponse.getClusterMetrics().getNumNodeManagers());

        GetClusterNodesRequest nodeRequest = recordFactory.newRecordInstance(GetClusterNodesRequest.class);
        GetClusterNodesResponse nodeResponse = applicationsManager.getClusterNodes(nodeRequest);

        List<JSONObject> reports = new ArrayList<JSONObject>();
        for (NodeReport report : nodeResponse.getNodeReports()) {
            JSONObject nr = new JSONObject();
            nr.put(Response.HTTP_ADDRESS, report.getHttpAddress());
            nr.put(Response.NUM_CONTAINERS, report.getNumContainers());
            nr.put(Response.RACK_NAME, report.getRackName());
            nr.put(Response.CAPABILITY, report.getCapability().getMemory());
            nr.put(Response.HEALTH_REPORT, report.getNodeHealthStatus().getHealthReport());
            nr.put(Response.IS_NODE_HEALTHY, report.getNodeHealthStatus().getIsNodeHealthy());
            nr.put(Response.LAST_HEALTH_REPORT_TIME,
                    new Date(report.getNodeHealthStatus().getLastHealthReportTime()));
            nr.put(Response.NODE_ID, report.getNodeId());
            nr.put(Response.NODE_STATE, report.getNodeState());
            nr.put(Response.NODE_STATE, report.getNodeState());
            nr.put(Response.USED, report.getUsed());
            reports.add(nr);
        }

        jsonObject.put(Response.NODES, reports);
    } catch (Exception e) {
        e.printStackTrace();
        log.error(e);
        Map<String, String> status = new HashMap<String, String>();
        status.put(Response.STATUS, e.getMessage());
        jsonObject = new JSONObject(status);
    }

    return jsonObject;
}

From source file:runtime.starter.MPJYarnClient.java

License:Open Source License

public void run() throws Exception {

    Map<String, String> map = System.getenv();

    try {//  ww w  .java 2s  .  c  o m
        mpjHomeDir = map.get("MPJ_HOME");

        if (mpjHomeDir == null) {
            throw new Exception("[MPJRun.java]:MPJ_HOME environment found..");
        }
    } catch (Exception exc) {
        System.out.println("[MPJRun.java]:" + exc.getMessage());
        exc.printStackTrace();
        return;
    }

    // Copy the application master jar to HDFS
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    /*
          Path dataset = new Path(fs.getHomeDirectory(),"/dataset");
          FileStatus datasetFile = fs.getFileStatus(dataset);
                 
          BlockLocation myBlocks [] = fs.getFileBlockLocations(datasetFile,0,datasetFile.getLen());
          for(BlockLocation b : myBlocks){
            System.out.println("\n--------------------");
            System.out.println("Length "+b.getLength());
            for(String host : b.getHosts()){
              System.out.println("host "+host);
            }
          }
    */
    Path source = new Path(mpjHomeDir + "/lib/mpj-app-master.jar");
    String pathSuffix = hdfsFolder + "mpj-app-master.jar";
    Path dest = new Path(fs.getHomeDirectory(), pathSuffix);

    if (debugYarn) {
        logger.info("Uploading mpj-app-master.jar to: " + dest.toString());
    }

    fs.copyFromLocalFile(false, true, source, dest);
    FileStatus destStatus = fs.getFileStatus(dest);

    Path wrapperSource = new Path(mpjHomeDir + "/lib/mpj-yarn-wrapper.jar");
    String wrapperSuffix = hdfsFolder + "mpj-yarn-wrapper.jar";
    Path wrapperDest = new Path(fs.getHomeDirectory(), wrapperSuffix);

    if (debugYarn) {
        logger.info("Uploading mpj-yarn-wrapper.jar to: " + wrapperDest.toString());
    }

    fs.copyFromLocalFile(false, true, wrapperSource, wrapperDest);

    Path userJar = new Path(jarPath);
    String userJarSuffix = hdfsFolder + "user-code.jar";
    Path userJarDest = new Path(fs.getHomeDirectory(), userJarSuffix);

    if (debugYarn) {
        logger.info("Uploading user-code.jar to: " + userJarDest.toString());
    }

    fs.copyFromLocalFile(false, true, userJar, userJarDest);

    YarnConfiguration conf = new YarnConfiguration();
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    if (debugYarn) {
        YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics();
        logger.info("\nNodes Information");
        logger.info("Number of NM: " + metrics.getNumNodeManagers() + "\n");

        List<NodeReport> nodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
        for (NodeReport n : nodeReports) {
            logger.info("NodeId: " + n.getNodeId());
            logger.info("RackName: " + n.getRackName());
            logger.info("Total Memory: " + n.getCapability().getMemory());
            logger.info("Used Memory: " + n.getUsed().getMemory());
            logger.info("Total vCores: " + n.getCapability().getVirtualCores());
            logger.info("Used vCores: " + n.getUsed().getVirtualCores() + "\n");
        }
    }

    logger.info("Creating server socket at HOST " + serverName + " PORT " + serverPort + " \nWaiting for " + np
            + " processes to connect...");

    // Creating a server socket for incoming connections
    try {
        servSock = new ServerSocket(serverPort);
        infoSock = new ServerSocket();
        TEMP_PORT = findPort(infoSock);
    } catch (Exception e) {
        e.printStackTrace();
    }

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();

    int maxMem = appResponse.getMaximumResourceCapability().getMemory();

    if (debugYarn) {
        logger.info("Max memory capability resources in cluster: " + maxMem);
    }

    if (amMem > maxMem) {
        amMem = maxMem;
        logger.info("AM memory specified above threshold of cluster "
                + "Using maximum memory for AM container: " + amMem);
    }
    int maxVcores = appResponse.getMaximumResourceCapability().getVirtualCores();

    if (debugYarn) {
        logger.info("Max vCores capability resources in cluster: " + maxVcores);
    }

    if (amCores > maxVcores) {
        amCores = maxVcores;
        logger.info("AM virtual cores specified above threshold of cluster "
                + "Using maximum virtual cores for AM container: " + amCores);
    }

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    List<String> commands = new ArrayList<String>();
    commands.add("$JAVA_HOME/bin/java");
    commands.add("-Xmx" + amMem + "m");
    commands.add("runtime.starter.MPJAppMaster");
    commands.add("--np");
    commands.add(String.valueOf(np));
    commands.add("--serverName");
    commands.add(serverName); //server name
    commands.add("--ioServerPort");
    commands.add(Integer.toString(serverPort)); //server port
    commands.add("--deviceName");
    commands.add(deviceName); //device name
    commands.add("--className");
    commands.add(className); //class name
    commands.add("--wdir");
    commands.add(workingDirectory); //wdir
    commands.add("--psl");
    commands.add(Integer.toString(psl)); //protocol switch limit
    commands.add("--wireUpPort");
    commands.add(String.valueOf(TEMP_PORT)); //for sharing ports & rank
    commands.add("--wrapperPath");
    commands.add(wrapperDest.toString());//MPJYarnWrapper.jar HDFS path
    commands.add("--userJarPath");
    commands.add(userJarDest.toString());//User Jar File HDFS path
    commands.add("--mpjContainerPriority");
    commands.add(mpjContainerPriority);// priority for mpj containers 
    commands.add("--containerMem");
    commands.add(containerMem);
    commands.add("--containerCores");
    commands.add(containerCores);

    if (debugYarn) {
        commands.add("--debugYarn");
    }

    if (appArgs != null) {

        commands.add("--appArgs");

        for (int i = 0; i < appArgs.length; i++) {
            commands.add(appArgs[i]);
        }
    }

    amContainer.setCommands(commands); //set commands

    // Setup local Resource for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);

    appMasterJar.setResource(ConverterUtils.getYarnUrlFromPath(dest));
    appMasterJar.setSize(destStatus.getLen());
    appMasterJar.setTimestamp(destStatus.getModificationTime());
    appMasterJar.setType(LocalResourceType.ARCHIVE);
    appMasterJar.setVisibility(LocalResourceVisibility.APPLICATION);

    amContainer.setLocalResources(Collections.singletonMap("mpj-app-master.jar", appMasterJar));

    // Setup CLASSPATH for ApplicationMaster
    // Setting up the environment
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    setupAppMasterEnv(appMasterEnv);
    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(amMem);
    capability.setVirtualCores(amCores);

    // Finally, set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();

    appContext.setApplicationName(appName);
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue(yarnQueue); // queue

    Priority priority = Priority.newInstance(amPriority);
    appContext.setPriority(priority);

    ApplicationId appId = appContext.getApplicationId();

    //Adding ShutDown Hook
    Runtime.getRuntime().addShutdownHook(new KillYarnApp(appId, yarnClient));

    // Submit application
    System.out.println("Submitting Application: " + appContext.getApplicationName() + "\n");

    try {
        isRunning = true;
        yarnClient.submitApplication(appContext);
    } catch (Exception exp) {
        System.err.println("Error Submitting Application");
        exp.printStackTrace();
    }

    // np = number of processes , + 1 for Application Master container
    IOMessagesThread[] ioThreads = new IOMessagesThread[np + 1];

    peers = new String[np];
    socketList = new Vector<Socket>();
    int wport = 0;
    int rport = 0;
    int rank = 0;

    // np + 1 IOThreads
    for (int i = 0; i < (np + 1); i++) {
        try {
            sock = servSock.accept();

            //start IO thread to read STDOUT and STDERR from wrappers
            IOMessagesThread io = new IOMessagesThread(sock);
            ioThreads[i] = io;
            ioThreads[i].start();
        } catch (Exception e) {
            System.err.println("Error accepting connection from peer socket..");
            e.printStackTrace();
        }
    }

    // Loop to read port numbers from Wrapper.java processes
    // and to create WRAPPER_INFO (containing all IPs and ports)
    String WRAPPER_INFO = "#Peer Information";
    for (int i = np; i > 0; i--) {
        try {
            sock = infoSock.accept();

            DataOutputStream out = new DataOutputStream(sock.getOutputStream());
            DataInputStream in = new DataInputStream(sock.getInputStream());
            if (in.readUTF().startsWith("Sending Info")) {
                wport = in.readInt();
                rport = in.readInt();
                rank = in.readInt();
                peers[rank] = ";" + sock.getInetAddress().getHostAddress() + "@" + rport + "@" + wport + "@"
                        + rank;
                socketList.add(sock);
            }
        } catch (Exception e) {
            System.err.println("[MPJYarnClient.java]: Error accepting" + " connection from peer socket!");
            e.printStackTrace();
        }
    }

    for (int i = 0; i < np; i++) {
        WRAPPER_INFO += peers[i];
    }
    // Loop to broadcast WRAPPER_INFO to all Wrappers
    for (int i = np; i > 0; i--) {
        try {
            sock = socketList.get(np - i);
            DataOutputStream out = new DataOutputStream(sock.getOutputStream());

            out.writeUTF(WRAPPER_INFO);
            out.flush();

            sock.close();
        } catch (Exception e) {
            System.err.println("[MPJYarnClient.java]: Error closing" + " connection from peer socket..");
            e.printStackTrace();
        }
    }

    try {
        infoSock.close();
    } catch (IOException exp) {
        exp.printStackTrace();
    }

    // wait for all IO Threads to complete 
    for (int i = 0; i < (np + 1); i++) {
        ioThreads[i].join();
    }
    isRunning = true;

    System.out.println("\nApplication Statistics!");
    while (true) {
        appReport = yarnClient.getApplicationReport(appId);
        appState = appReport.getYarnApplicationState();
        fStatus = appReport.getFinalApplicationStatus();
        if (appState == YarnApplicationState.FINISHED) {
            isRunning = false;
            if (fStatus == FinalApplicationStatus.SUCCEEDED) {
                System.out.println("State: " + fStatus);
            } else {
                System.out.println("State: " + fStatus);
            }
            break;
        } else if (appState == YarnApplicationState.KILLED) {
            isRunning = false;
            System.out.println("State: " + appState);
            break;
        } else if (appState == YarnApplicationState.FAILED) {
            isRunning = false;
            System.out.println("State: " + appState);
            break;
        }
        Thread.sleep(100);
    }

    try {

        if (debugYarn) {
            logger.info("Cleaning the files from hdfs: ");
            logger.info("1) " + dest.toString());
            logger.info("2) " + wrapperDest.toString());
            logger.info("3) " + userJarDest.toString());
        }

        fs.delete(dest);
        fs.delete(wrapperDest);
        fs.delete(userJarDest);
    } catch (IOException exp) {
        exp.printStackTrace();
    }
    System.out.println("Application ID: " + appId + "\n" + "Application User: " + appReport.getUser() + "\n"
            + "RM Queue: " + appReport.getQueue() + "\n" + "Start Time: " + appReport.getStartTime() + "\n"
            + "Finish Time: " + appReport.getFinishTime());
}

From source file:uk.ac.gla.terrier.probos.controller.ControllerServer.java

License:Open Source License

@Override
public PBSNodeStatus[] getNodesStatus() throws Exception {

    //first use the container reports of all running jobs to get a picture of the hosts in use
    //for each job
    TIntObjectHashMap<List<ContainerId>> job2con = getAllActiveContainers();
    final Map<String, TIntArrayList> node2job = new HashMap<String, TIntArrayList>();
    job2con.forEachEntry(new TIntObjectProcedure<List<ContainerId>>() {
        @Override//  w  w w.j a  v a2  s. com
        public boolean execute(int jobId, List<ContainerId> containerList) {
            for (ContainerId cid : containerList) {
                try {
                    ContainerReport cr = yClient.getContainerReport(cid);
                    String hostname = cr.getAssignedNode().getHost();

                    TIntArrayList jobs = node2job.get(hostname);
                    if (jobs == null)
                        node2job.put(hostname, jobs = new TIntArrayList());
                    jobs.add(jobId);
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
            return true;
        }
    });

    List<NodeReport> nodeReports = yClient.getNodeReports();

    PBSNodeStatus[] rtr = new PBSNodeStatus[nodeReports.size()];
    for (int i = 0; i < rtr.length; i++) {
        final NodeReport node = nodeReports.get(i);
        String hostname = node.getNodeId().getHost();
        String yarnState = node.getNodeState().toString();

        String rack = node.getRackName();
        String tracker = node.getHttpAddress();
        int numContainers = node.getNumContainers();
        int numProcs = node.getCapability().getVirtualCores();
        TIntArrayList jobList = node2job.get(hostname);
        int[] jobs;
        if (jobList == null)
            jobs = new int[0];
        else
            jobs = jobList.toArray();

        String state = "free";
        if (numContainers >= numProcs)
            state = "busy";

        StringBuilder status = new StringBuilder();
        status.append("capacity=" + node.getCapability().toString());
        status.append(",used=" + node.getUsed().toString());

        rtr[i] = new PBSNodeStatus(hostname, state, status.toString(), jobs, tracker, node.getHealthReport(),
                rack, yarnState, numProcs, node.getNodeLabels());
    }
    return rtr;
}