Example usage for org.apache.hadoop.yarn.client.api YarnClient createYarnClient

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.client.api YarnClient createYarnClient.

Prototype

@Public
public static YarnClient createYarnClient()

Source Link

Document

Create a new instance of YarnClient.

Usage

From source file:com.datatorrent.stram.StreamingAppMasterService.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/*  w ww  .  ja v  a 2 s  .co  m*/
 */
@SuppressWarnings("SleepWhileInLoop")
private void execute() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    LOG.info("number of tokens: {}", credentials.getAllTokens().size());
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.debug("token: {}", token);
    }
    final Configuration conf = getConfig();
    long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math
            .min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME)));
    long expiryTime = System.currentTimeMillis() + tokenLifeTime;
    LOG.debug(" expiry token time {}", tokenLifeTime);
    String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE);

    // Register self with ResourceManager
    RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0,
            appMasterTrackingUrl);

    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    int maxVcores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max mem {}m and vcores {} capabililty of resources in this cluster ", maxMem, maxVcores);

    // for locality relaxation fall back
    Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps
            .newHashMap();

    // Setup heartbeat emitter
    // TODO poll RM every now and then with an empty request to let RM know that we are alive
    // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting:
    // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS
    // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter
    // is not required.

    int loopCounter = -1;
    List<ContainerId> releasedContainers = new ArrayList<ContainerId>();
    int numTotalContainers = 0;
    // keep track of already requested containers to not request them again while waiting for allocation
    int numRequestedContainers = 0;
    int numReleasedContainers = 0;
    int nextRequestPriority = 0;
    ResourceRequestHandler resourceRequestor = new ResourceRequestHandler();

    YarnClient clientRMService = YarnClient.createYarnClient();

    try {
        // YARN-435
        // we need getClusterNodes to populate the initial node list,
        // subsequent updates come through the heartbeat response
        clientRMService.init(conf);
        clientRMService.start();

        ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService,
                dag.getAttributes().get(DAG.APPLICATION_NAME),
                UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID));
        if (ar != null) {
            appDone = true;
            dnmgr.shutdownDiagnosticsMessage = String.format(
                    "Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.",
                    ar.getApplicationId().toString(), ar.getName(), ar.getUser());
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finishApplication(FinalApplicationStatus.FAILED, numTotalContainers);
            return;
        }
        resourceRequestor.updateNodeReports(clientRMService.getNodeReports());
    } catch (Exception e) {
        throw new RuntimeException("Failed to retrieve cluster nodes report.", e);
    } finally {
        clientRMService.stop();
    }

    // check for previously allocated containers
    // as of 2.2, containers won't survive AM restart, but this will change in the future - YARN-1490
    checkContainerStatus();
    FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
    final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS,
            YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);

    while (!appDone) {
        loopCounter++;

        if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime
                && hdfsKeyTabFile != null) {
            String applicationId = appAttemptID.getApplicationId().toString();
            expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, "." + File.separator + "tmp",
                    applicationId, conf, hdfsKeyTabFile, credentials, rmAddress, true);
        }

        Runnable r;
        while ((r = this.pendingTasks.poll()) != null) {
            r.run();
        }

        // log current state
        /*
         * LOG.info("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" +
         * numTotalContainers + ", requested=" + numRequestedContainers + ", completed=" + numCompletedContainers +
         * ", failed=" + numFailedContainers + ", currentAllocated=" + this.allAllocatedContainers.size());
         */
        // Sleep before each loop when asking RM for containers
        // to avoid flooding RM with spurious requests when it
        // need not have any available containers
        try {
            sleep(1000);
        } catch (InterruptedException e) {
            LOG.info("Sleep interrupted " + e.getMessage());
        }

        // Setup request to be sent to RM to allocate containers
        List<ContainerRequest> containerRequests = new ArrayList<ContainerRequest>();
        List<ContainerRequest> removedContainerRequests = new ArrayList<ContainerRequest>();

        // request containers for pending deploy requests
        if (!dnmgr.containerStartRequests.isEmpty()) {
            StreamingContainerAgent.ContainerStartRequest csr;
            while ((csr = dnmgr.containerStartRequests.poll()) != null) {
                if (csr.container.getRequiredMemoryMB() > maxMem) {
                    LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.",
                            csr.container.getRequiredMemoryMB(), maxMem);
                    csr.container.setRequiredMemoryMB(maxMem);
                }
                if (csr.container.getRequiredVCores() > maxVcores) {
                    LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.",
                            csr.container.getRequiredVCores(), maxVcores);
                    csr.container.setRequiredVCores(maxVcores);
                }
                csr.container.setResourceRequestPriority(nextRequestPriority++);
                ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true);
                MutablePair<Integer, ContainerRequest> pair = new MutablePair<Integer, ContainerRequest>(
                        loopCounter, cr);
                requestedResources.put(csr, pair);
                containerRequests.add(cr);
            }
        }

        if (!requestedResources.isEmpty()) {
            //resourceRequestor.clearNodeMapping();
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) {
                    StreamingContainerAgent.ContainerStartRequest csr = entry.getKey();
                    removedContainerRequests.add(entry.getValue().getRight());
                    ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false);
                    entry.getValue().setLeft(loopCounter);
                    entry.getValue().setRight(cr);
                    containerRequests.add(cr);
                }
            }
        }

        numTotalContainers += containerRequests.size();
        numRequestedContainers += containerRequests.size();
        AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests,
                releasedContainers);
        if (amResp.getAMCommand() != null) {
            LOG.info(" statement executed:{}", amResp.getAMCommand());
            switch (amResp.getAMCommand()) {
            case AM_RESYNC:
            case AM_SHUTDOWN:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");
            default:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");

            }
        }
        releasedContainers.clear();

        // Retrieve list of allocated containers from the response
        List<Container> newAllocatedContainers = amResp.getAllocatedContainers();
        // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size());
        numRequestedContainers -= newAllocatedContainers.size();
        long timestamp = System.currentTimeMillis();
        for (Container allocatedContainer : newAllocatedContainers) {

            LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode="
                    + allocatedContainer.getNodeId() + ", containerNodeURI="
                    + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory"
                    + allocatedContainer.getResource().getMemory() + ", priority"
                    + allocatedContainer.getPriority());
            // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString());

            boolean alreadyAllocated = true;
            StreamingContainerAgent.ContainerStartRequest csr = null;
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority()
                        .getPriority()) {
                    alreadyAllocated = false;
                    csr = entry.getKey();
                    break;
                }
            }

            if (alreadyAllocated) {
                LOG.info("Releasing {} as resource with priority {} was already assigned",
                        allocatedContainer.getId(), allocatedContainer.getPriority());
                releasedContainers.add(allocatedContainer.getId());
                numReleasedContainers++;
                numRequestedContainers++;
                continue;
            }
            if (csr != null) {
                requestedResources.remove(csr);
            }

            // allocate resource to container
            ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(),
                    allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(),
                    allocatedContainer.getResource().getMemory(),
                    allocatedContainer.getResource().getVirtualCores(),
                    allocatedContainer.getNodeHttpAddress());
            StreamingContainerAgent sca = dnmgr.assignContainer(resource, null);

            if (sca == null) {
                // allocated container no longer needed, add release request
                LOG.warn("Container {} allocated but nothing to deploy, going to release this container.",
                        allocatedContainer.getId());
                releasedContainers.add(allocatedContainer.getId());
            } else {
                AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer);
                this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder);
                ByteBuffer tokens = null;
                if (UserGroupInformation.isSecurityEnabled()) {
                    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                    Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken(
                            ugi.getUserName(), heartbeatListener.getAddress());
                    allocatedContainerHolder.delegationToken = delegationToken;
                    //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress());
                    tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken);
                }
                LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer,
                        nmClient, sca, tokens);
                // Thread launchThread = new Thread(runnableLaunchContainer);
                // launchThreads.add(launchThread);
                // launchThread.start();
                launchContainer.run(); // communication with NMs is now async

                // record container start event
                StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(),
                        allocatedContainer.getNodeId().toString());
                ev.setTimestamp(timestamp);
                dnmgr.recordEventAsync(ev);
            }
        }

        // track node updates for future locality constraint allocations
        // TODO: it seems 2.0.4-alpha doesn't give us any updates
        resourceRequestor.updateNodeReports(amResp.getUpdatedNodes());

        // Check the completed containers
        List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses();
        // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size());
        for (ContainerStatus containerStatus : completedContainers) {
            LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state="
                    + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                    + ", diagnostics=" + containerStatus.getDiagnostics());

            // non complete containers should not be here
            assert (containerStatus.getState() == ContainerState.COMPLETE);

            AllocatedContainer allocatedContainer = allocatedContainers
                    .remove(containerStatus.getContainerId().toString());
            if (allocatedContainer != null && allocatedContainer.delegationToken != null) {
                UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName());
            }
            int exitStatus = containerStatus.getExitStatus();
            if (0 != exitStatus) {
                if (allocatedContainer != null) {
                    numFailedContainers.incrementAndGet();
                }
                //          if (exitStatus == 1) {
                //            // non-recoverable StreamingContainer failure
                //            appDone = true;
                //            finalStatus = FinalApplicationStatus.FAILED;
                //            dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId();
                //            LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage);
                //          }
                //          else {
                // Recoverable failure or process killed (externally or via stop request by AM)
                // also occurs when a container was released by the application but never assigned/launched
                LOG.debug("Container {} failed or killed.", containerStatus.getContainerId());
                dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString());
                //          }
            } else {
                // container completed successfully
                numCompletedContainers.incrementAndGet();
                LOG.info("Container completed successfully." + ", containerId="
                        + containerStatus.getContainerId());
            }

            String containerIdStr = containerStatus.getContainerId().toString();
            dnmgr.removeContainerAgent(containerIdStr);

            // record container stop event
            StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus());
            ev.setReason(containerStatus.getDiagnostics());
            dnmgr.recordEventAsync(ev);
        }

        if (dnmgr.forcedShutdown) {
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finalStatus = FinalApplicationStatus.FAILED;
            appDone = true;
        } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0
                && dnmgr.containerStartRequests.isEmpty()) {
            LOG.debug("Exiting as no more containers are allocated or requested");
            finalStatus = FinalApplicationStatus.SUCCEEDED;
            appDone = true;
        }

        LOG.debug("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total="
                + numTotalContainers + ", requested=" + numRequestedContainers + ", released="
                + numReleasedContainers + ", completed=" + numCompletedContainers + ", failed="
                + numFailedContainers + ", currentAllocated=" + allocatedContainers.size());

        // monitor child containers
        dnmgr.monitorHeartbeat();
    }

    finishApplication(finalStatus, numTotalContainers);
}

From source file:com.epam.hadoop.nv.yarn.Client.java

License:Apache License

Client(String appMasterMainClass, Configuration conf) {
    this.conf = conf;
    this.appMasterMainClass = appMasterMainClass;
    yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);/*from   www  .j  av a 2  s  . co  m*/
    opts = new Options();
    opts.addOption("appname", true, "Application Name. Default value - DistributedShell");
    opts.addOption("priority", true, "Application Priority. Default 0");
    opts.addOption("queue", true, "RM Queue in which this application is to be submitted");
    opts.addOption("timeout", true, "Application timeout in milliseconds");
    opts.addOption("master_memory", true,
            "Amount of memory in MB to be requested to run the application master");
    opts.addOption("master_vcores", true,
            "Amount of virtual cores to be requested to run the application master");
    opts.addOption("jar", true, "Jar file containing the application master");
    opts.addOption("shell_command", true, "Shell command to be executed by "
            + "the Application Master. Can only specify either --shell_command " + "or --shell_script");
    opts.addOption("shell_script", true, "Location of the shell script to be "
            + "executed. Can only specify either --shell_command or --shell_script");
    opts.addOption("shell_args", true,
            "Command line args for the shell script." + "Multiple args can be separated by empty space.");
    opts.getOption("shell_args").setArgs(Option.UNLIMITED_VALUES);
    opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs");
    opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers");
    opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command");
    opts.addOption("container_vcores", true,
            "Amount of virtual cores to be requested to run the shell command");
    opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed");
    opts.addOption("log_properties", true, "log4j.properties file");
    opts.addOption("keep_containers_across_application_attempts", false,
            "Flag to indicate whether to keep containers across application attempts."
                    + " If the flag is true, running containers will not be killed when"
                    + " application attempt fails and these containers will be retrieved by"
                    + " the new application attempt ");
    opts.addOption("debug", false, "Dump out debug information");
    opts.addOption("help", false, "Print usage");

}

From source file:com.flyhz.avengers.framework.application.InitEnvApplication.java

License:Apache License

private void initJar() {
    LOG.info("initJar");
    numTotalContainers = 1;//w  w w.  j  a  va  2s  .c om
    containerMemory = 10;
    if (numTotalContainers == 0) {
        throw new IllegalArgumentException("Cannot run distributed shell with no containers");
    }
    requestPriority = 0;
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();
    List<NodeReport> clusterNodeReports;
    try {
        clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
        LOG.info("Got all node!");
        for (NodeReport node : clusterNodeReports) {
            nodeSet.add(node.getNodeId().getHost());
        }

        LOG.info("initJar Completed setting up app master command {}", initJarCmd);
        numTotalContainers = nodeSet.size();
        for (int i = 0; i < numTotalContainers; i++) {
            ContainerRequest containerAsk = setupContainerAskForRM();
            amRMClient.addContainerRequest(containerAsk);
        }

        numRequestedContainers.set(numTotalContainers);

    } catch (YarnException e) {
        LOG.error("initJarAndClasspath", e);
    } catch (IOException e) {
        LOG.error("initJarAndClasspath", e);
    } finally {
        try {
            yarnClient.close();
        } catch (IOException e) {
        }
    }
}

From source file:com.flyhz.avengers.framework.AvengersAppMaster.java

License:Apache License

private void initJar() {
    LOG.info("initJar");
    initCommon();/*ww  w  .j a v a2  s .c o m*/
    this.currentProcess = "initJar";
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();
    List<NodeReport> clusterNodeReports;
    try {
        clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
        LOG.info("Got all node!");
        for (NodeReport node : clusterNodeReports) {
            LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                    + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                    + node.getNumContainers());
            nodeSet.add(node.getNodeId().getHost());
        }
        Vector<CharSequence> vargs = new Vector<CharSequence>(30);
        String appTempDir = conf.get("hadoop.tmp.dir");
        FileSystem fs = DistributedFileSystem.get(conf);
        String hdfsJar = fs.getHomeDirectory() + "/avengers/" + this.appAttemptID.getApplicationId().getId()
                + "/AvengersAppMaster.jar";
        vargs.add("if [ ! -e " + appTempDir + "/" + this.appAttemptID.getApplicationId().getId()
                + "/AvengersAppMaster.jar" + " ];");
        vargs.add("then");
        vargs.add("/bin/mkdir -p");
        vargs.add(appTempDir + "/" + this.appAttemptID.getApplicationId().getId() + ";");
        vargs.add(Environment.HADOOP_YARN_HOME.$() + "/bin/hadoop");
        vargs.add("fs");
        vargs.add("-copyToLocal");
        vargs.add(hdfsJar);
        vargs.add(appTempDir + "/" + this.appAttemptID.getApplicationId().getId() + ";");
        vargs.add("fi");
        // Add log redirect params
        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Avengers.stdout");
        vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Avengers.stderr");

        // Get final commmand
        StringBuilder command = new StringBuilder();
        for (CharSequence str : vargs) {
            command.append(str).append(" ");
        }

        initJarCmd = command.toString();
        LOG.info("initJar Completed setting up app master command {}", initJarCmd);
        numTotalContainers = nodeSet.size();
        for (int i = 0; i < numTotalContainers; i++) {
            ContainerRequest containerAsk = setupContainerAskForRM();
            amRMClient.addContainerRequest(containerAsk);
        }

        numRequestedContainers.set(numTotalContainers);

        while (!done && (numCompletedContainers.get() != numTotalContainers)) {
            try {
                Thread.sleep(200);
            } catch (InterruptedException ex) {
            }
        }
    } catch (YarnException e) {
        LOG.error("initJarAndClasspath", e);
    } catch (IOException e) {
        LOG.error("initJarAndClasspath", e);
    } finally {
        try {
            yarnClient.close();
        } catch (IOException e) {
        }
    }
}

From source file:com.flyhz.avengers.framework.AvengersClient.java

License:Apache License

AvengersClient(Configuration conf) {

    batchId = System.currentTimeMillis();
    LOG.info("conf is {},batchId is {}", batchId, conf);
    this.conf = conf;
    System.setProperty("HADOOP_USER_NAME", "avengers");
    yarnClient = YarnClient.createYarnClient();
    conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS,
            YarnConfiguration.DEFAULT_RM_PORT);

    yarnClient.init(conf);/*from ww w.ja  v a 2  s . c  o  m*/

    opts = new Options();
    opts.addOption("jar", true, "Jar file containing the application master");

    Option all = new Option("all", false, "avengers all:include crawl fetch analyze");
    all.setArgs(0);

    Option crawl = new Option("crawl", true, "avengers crawl");
    crawl.setArgs(1);
    crawl.setArgName("batchId");

    Option fetch = new Option("fetch", true, "avengers fetch");
    fetch.setArgs(1);
    fetch.setArgName("batchId");

    Option analyze = new Option("analyze", true, "avengers analyze");
    analyze.setArgs(1);
    analyze.setArgName("batchId");

    opts.addOption(all);
    opts.addOption(crawl);
    opts.addOption(fetch);
    opts.addOption(analyze);

    avengersProcessCmds = new LinkedHashMap<String, List<String>>();
    this.hdfsPath = "avengers/" + batchId + "/avengers.jar";
}

From source file:com.github.hdl.tensorflow.yarn.app.Client.java

License:Apache License

Client(String appMasterMainClass, Configuration conf) {
    this.conf = conf;
    this.appMasterMainClass = appMasterMainClass;
    yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);//  www.ja va2s. c om
    opts = new Options();
    opts.addOption(TFApplication.OPT_TF_APPNAME, true, "Application Name. Default value - tensorflow");
    opts.addOption(TFApplication.OPT_TF_PRIORITY, true, "Application Priority. Default 0");
    opts.addOption(TFApplication.OPT_TF_QUEUE, true, "RM Queue in which this application is to be submitted");
    opts.addOption("jar", true, "Jar file containing the application master");
    opts.addOption(TFApplication.OPT_TF_MASTER_MEMORY, true,
            "Amount of memory in MB to be requested to run the application master");
    opts.addOption(TFApplication.OPT_TF_MASTER_VCORES, true,
            "Amount of virtual cores to be requested to run the application master");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_MEMORY, true,
            "Amount of memory in MB to be requested to run a tensorflow worker");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_VCORES, true,
            "Amount of virtual cores to be requested to run a tensorflow worker");
    opts.addOption(TFApplication.OPT_TF_LOG_PROPERTIES, true, "log4j.properties file");
    opts.addOption(TFApplication.OPT_TF_ATTEMPT_FAILURES_VALIDITY_INTERVAL, true,
            "when attempt_failures_validity_interval in milliseconds is set to > 0,"
                    + "the failure number will not take failures which happen out of "
                    + "the validityInterval into failure count. "
                    + "If failure count reaches to maxAppAttempts, " + "the application will be failed.");
    opts.addOption(TFApplication.OPT_TF_NODE_LABEL_EXPRESSION, true,
            "Node label expression to determine the nodes" + " where all the containers of this application"
                    + " will be allocated, \"\" means containers"
                    + " can be allocated anywhere, if you don't specify the option,"
                    + " default node_label_expression of queue will be used.");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_POLICY, true,
            "Retry policy when container fails to run, " + "0: NEVER_RETRY, 1: RETRY_ON_ALL_ERRORS, "
                    + "2: RETRY_ON_SPECIFIC_ERROR_CODES");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_ERROR_CODES, true,
            "When retry policy is set to RETRY_ON_SPECIFIC_ERROR_CODES, error "
                    + "codes is specified with this option, " + "e.g. --container_retry_error_codes 1,2,3");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_MAX_RETRIES, true,
            "If container could retry, it specifies max retires");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_INTERVAL, true,
            "Interval between each retry, unit is milliseconds");
    opts.addOption(TFApplication.OPT_TF_CLIENT, true, "Provide client python of tensorflow");
    opts.addOption(TFApplication.OPT_TF_WORKER_NUM, true, "worker quantity of tensorflow");
    opts.addOption(TFApplication.OPT_TF_PS_NUM, true, "ps quantity of tensorflow");
    opts.addOption(TFApplication.OPT_TF_JNI_SO, true, "jni so of tensorflow");
    opts.addOption(TFApplication.OPT_TF_ENABLE_TENSORBOARD, true, "flag whether enable the tensorboard");
    opts.addOption(TFApplication.OPT_TF_TENSORBOARD_INPUTDIR, true, "the event storage dir of the tensorboard");
}

From source file:com.github.sakserv.minicluster.simpleyarnapp.Client.java

License:Apache License

public void run(String[] args) throws Exception {
    final String command = args[0];
    final int n = Integer.valueOf(args[1]);
    final Path jarPath = new Path(args[2]);
    final String resourceManagerAddress = args[3];
    final String resourceManagerHostname = args[4];
    final String resourceManagerSchedulerAddress = args[5];
    final String resourceManagerResourceTrackerAddress = args[6];

    // Create yarnClient
    YarnConfiguration conf = new YarnConfiguration();
    conf.set("yarn.resourcemanager.address", resourceManagerAddress);
    conf.set("yarn.resourcemanager.hostname", resourceManagerHostname);
    conf.set("yarn.resourcemanager.scheduler.address", resourceManagerSchedulerAddress);
    conf.set("yarn.resourcemanager.resource-tracker.address", resourceManagerResourceTrackerAddress);
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);//from  ww  w.j  av  a  2s .c om
    yarnClient.start();

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
    amContainer.setCommands(Collections.singletonList("$JAVA_HOME/bin/java" + " -Xmx256M"
            + " com.hortonworks.simpleyarnapp.ApplicationMaster" + " " + command + " " + String.valueOf(n) + " "
            + resourceManagerAddress + " " + resourceManagerHostname + " " + resourceManagerSchedulerAddress
            + " " + resourceManagerResourceTrackerAddress + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
            + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"));

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    setupAppMasterJar(jarPath, appMasterJar);
    amContainer.setLocalResources(Collections.singletonMap("simple-yarn-app-1.1.0.jar", appMasterJar));

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    setupAppMasterEnv(appMasterEnv);
    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(256);
    capability.setVirtualCores(1);

    // Finally, set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationName("simple-yarn-app"); // application name
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue("default"); // queue

    // Submit application
    ApplicationId appId = appContext.getApplicationId();
    System.out.println("Submitting application " + appId);
    yarnClient.submitApplication(appContext);

    ApplicationReport appReport = yarnClient.getApplicationReport(appId);
    YarnApplicationState appState = appReport.getYarnApplicationState();
    while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED
            && appState != YarnApplicationState.FAILED) {
        Thread.sleep(100);
        appReport = yarnClient.getApplicationReport(appId);
        appState = appReport.getYarnApplicationState();
    }

    System.out.println("Application " + appId + " finished with" + " state " + appState + " at "
            + appReport.getFinishTime());

}

From source file:com.google.mr4c.hadoop.yarn.YarnBinding.java

License:Open Source License

public void addClusterLimits(JobConf jobConf, ResourceConfig resConf) throws IOException {
    if (resConf.isEmpty()) {
        return;//  w ww .j  ava2s  .c o  m
    }
    Configuration conf = new Configuration(jobConf);
    YarnClient client = YarnClient.createYarnClient();
    client.init(conf);
    client.start();
    try {
        for (ResourceLimit limit : computeResourceLimits(client.getNodeReports())) {
            resConf.addLimit(limit);
        }
    } catch (YarnException ye) {
        throw new IOException(ye);
    } finally {
        client.stop();
    }
}

From source file:com.gpiskas.yarn.Client.java

License:Open Source License

private void run() throws Exception {
    conf = new YarnConfiguration();

    // Create Yarn Client
    YarnClient client = YarnClient.createYarnClient();
    client.init(conf);/*from w  w w  .j  a  v  a 2  s .  co  m*/
    client.start();

    // Create Application
    YarnClientApplication app = client.createApplication();

    // Create AM Container
    ContainerLaunchContext amCLC = Records.newRecord(ContainerLaunchContext.class);
    amCLC.setCommands(Collections.singletonList("$JAVA_HOME/bin/java" + " -Xmx256M"
            + " com.gpiskas.yarn.AppMaster" + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout"
            + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"));

    // Set AM jar
    LocalResource jar = Records.newRecord(LocalResource.class);
    Utils.setUpLocalResource(Utils.YARNAPP_JAR_PATH, jar, conf);
    amCLC.setLocalResources(Collections.singletonMap(Utils.YARNAPP_JAR_NAME, jar));

    // Set AM CLASSPATH
    Map<String, String> env = new HashMap<String, String>();
    Utils.setUpEnv(env, conf);
    amCLC.setEnvironment(env);

    // Set AM resources
    Resource res = Records.newRecord(Resource.class);
    res.setMemory(256);
    res.setVirtualCores(1);

    // Create ApplicationSubmissionContext
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationName("YARNAPP");
    appContext.setQueue("default");
    appContext.setAMContainerSpec(amCLC);
    appContext.setResource(res);

    // Submit Application
    ApplicationId id = appContext.getApplicationId();
    System.out.println("Client: Submitting " + id);
    client.submitApplication(appContext);

    ApplicationReport appReport = client.getApplicationReport(id);
    YarnApplicationState appState = appReport.getYarnApplicationState();
    while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED
            && appState != YarnApplicationState.FAILED) {
        Thread.sleep(1000);
        appReport = client.getApplicationReport(id);
        appState = appReport.getYarnApplicationState();
    }

    System.out.println("Client: Finished " + id + " with state " + appState);
}

From source file:com.hazelcast.yarn.HazelcastYarnClient.java

License:Open Source License

private YarnClient startYarnClient(YarnConfiguration conf) {
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();
    return yarnClient;
}