List of usage examples for org.apache.hadoop.yarn.client.api YarnClient createYarnClient
@Public public static YarnClient createYarnClient()
From source file:com.datatorrent.stram.StreamingAppMasterService.java
License:Apache License
/** * Main run function for the application master * * @throws YarnException/* w ww . ja v a 2 s .co m*/ */ @SuppressWarnings("SleepWhileInLoop") private void execute() throws YarnException, IOException { LOG.info("Starting ApplicationMaster"); final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); LOG.info("number of tokens: {}", credentials.getAllTokens().size()); Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); LOG.debug("token: {}", token); } final Configuration conf = getConfig(); long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math .min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME))); long expiryTime = System.currentTimeMillis() + tokenLifeTime; LOG.debug(" expiry token time {}", tokenLifeTime); String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE); // Register self with ResourceManager RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0, appMasterTrackingUrl); // Dump out information about cluster capability as seen by the resource manager int maxMem = response.getMaximumResourceCapability().getMemory(); int maxVcores = response.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max mem {}m and vcores {} capabililty of resources in this cluster ", maxMem, maxVcores); // for locality relaxation fall back Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps .newHashMap(); // Setup heartbeat emitter // TODO poll RM every now and then with an empty request to let RM know that we are alive // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting: // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter // is not required. int loopCounter = -1; List<ContainerId> releasedContainers = new ArrayList<ContainerId>(); int numTotalContainers = 0; // keep track of already requested containers to not request them again while waiting for allocation int numRequestedContainers = 0; int numReleasedContainers = 0; int nextRequestPriority = 0; ResourceRequestHandler resourceRequestor = new ResourceRequestHandler(); YarnClient clientRMService = YarnClient.createYarnClient(); try { // YARN-435 // we need getClusterNodes to populate the initial node list, // subsequent updates come through the heartbeat response clientRMService.init(conf); clientRMService.start(); ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService, dag.getAttributes().get(DAG.APPLICATION_NAME), UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID)); if (ar != null) { appDone = true; dnmgr.shutdownDiagnosticsMessage = String.format( "Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.", ar.getApplicationId().toString(), ar.getName(), ar.getUser()); LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage); finishApplication(FinalApplicationStatus.FAILED, numTotalContainers); return; } resourceRequestor.updateNodeReports(clientRMService.getNodeReports()); } catch (Exception e) { throw new RuntimeException("Failed to retrieve cluster nodes report.", e); } finally { clientRMService.stop(); } // check for previously allocated containers // as of 2.2, containers won't survive AM restart, but this will change in the future - YARN-1490 checkContainerStatus(); FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED; final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); while (!appDone) { loopCounter++; if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime && hdfsKeyTabFile != null) { String applicationId = appAttemptID.getApplicationId().toString(); expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, "." + File.separator + "tmp", applicationId, conf, hdfsKeyTabFile, credentials, rmAddress, true); } Runnable r; while ((r = this.pendingTasks.poll()) != null) { r.run(); } // log current state /* * LOG.info("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" + * numTotalContainers + ", requested=" + numRequestedContainers + ", completed=" + numCompletedContainers + * ", failed=" + numFailedContainers + ", currentAllocated=" + this.allAllocatedContainers.size()); */ // Sleep before each loop when asking RM for containers // to avoid flooding RM with spurious requests when it // need not have any available containers try { sleep(1000); } catch (InterruptedException e) { LOG.info("Sleep interrupted " + e.getMessage()); } // Setup request to be sent to RM to allocate containers List<ContainerRequest> containerRequests = new ArrayList<ContainerRequest>(); List<ContainerRequest> removedContainerRequests = new ArrayList<ContainerRequest>(); // request containers for pending deploy requests if (!dnmgr.containerStartRequests.isEmpty()) { StreamingContainerAgent.ContainerStartRequest csr; while ((csr = dnmgr.containerStartRequests.poll()) != null) { if (csr.container.getRequiredMemoryMB() > maxMem) { LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.", csr.container.getRequiredMemoryMB(), maxMem); csr.container.setRequiredMemoryMB(maxMem); } if (csr.container.getRequiredVCores() > maxVcores) { LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.", csr.container.getRequiredVCores(), maxVcores); csr.container.setRequiredVCores(maxVcores); } csr.container.setResourceRequestPriority(nextRequestPriority++); ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true); MutablePair<Integer, ContainerRequest> pair = new MutablePair<Integer, ContainerRequest>( loopCounter, cr); requestedResources.put(csr, pair); containerRequests.add(cr); } } if (!requestedResources.isEmpty()) { //resourceRequestor.clearNodeMapping(); for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources .entrySet()) { if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) { StreamingContainerAgent.ContainerStartRequest csr = entry.getKey(); removedContainerRequests.add(entry.getValue().getRight()); ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false); entry.getValue().setLeft(loopCounter); entry.getValue().setRight(cr); containerRequests.add(cr); } } } numTotalContainers += containerRequests.size(); numRequestedContainers += containerRequests.size(); AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests, releasedContainers); if (amResp.getAMCommand() != null) { LOG.info(" statement executed:{}", amResp.getAMCommand()); switch (amResp.getAMCommand()) { case AM_RESYNC: case AM_SHUTDOWN: throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM"); default: throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM"); } } releasedContainers.clear(); // Retrieve list of allocated containers from the response List<Container> newAllocatedContainers = amResp.getAllocatedContainers(); // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size()); numRequestedContainers -= newAllocatedContainers.size(); long timestamp = System.currentTimeMillis(); for (Container allocatedContainer : newAllocatedContainers) { LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode=" + allocatedContainer.getNodeId() + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory" + allocatedContainer.getResource().getMemory() + ", priority" + allocatedContainer.getPriority()); // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString()); boolean alreadyAllocated = true; StreamingContainerAgent.ContainerStartRequest csr = null; for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources .entrySet()) { if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority() .getPriority()) { alreadyAllocated = false; csr = entry.getKey(); break; } } if (alreadyAllocated) { LOG.info("Releasing {} as resource with priority {} was already assigned", allocatedContainer.getId(), allocatedContainer.getPriority()); releasedContainers.add(allocatedContainer.getId()); numReleasedContainers++; numRequestedContainers++; continue; } if (csr != null) { requestedResources.remove(csr); } // allocate resource to container ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(), allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(), allocatedContainer.getResource().getMemory(), allocatedContainer.getResource().getVirtualCores(), allocatedContainer.getNodeHttpAddress()); StreamingContainerAgent sca = dnmgr.assignContainer(resource, null); if (sca == null) { // allocated container no longer needed, add release request LOG.warn("Container {} allocated but nothing to deploy, going to release this container.", allocatedContainer.getId()); releasedContainers.add(allocatedContainer.getId()); } else { AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer); this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder); ByteBuffer tokens = null; if (UserGroupInformation.isSecurityEnabled()) { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken( ugi.getUserName(), heartbeatListener.getAddress()); allocatedContainerHolder.delegationToken = delegationToken; //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress()); tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken); } LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer, nmClient, sca, tokens); // Thread launchThread = new Thread(runnableLaunchContainer); // launchThreads.add(launchThread); // launchThread.start(); launchContainer.run(); // communication with NMs is now async // record container start event StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString()); ev.setTimestamp(timestamp); dnmgr.recordEventAsync(ev); } } // track node updates for future locality constraint allocations // TODO: it seems 2.0.4-alpha doesn't give us any updates resourceRequestor.updateNodeReports(amResp.getUpdatedNodes()); // Check the completed containers List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses(); // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics()); // non complete containers should not be here assert (containerStatus.getState() == ContainerState.COMPLETE); AllocatedContainer allocatedContainer = allocatedContainers .remove(containerStatus.getContainerId().toString()); if (allocatedContainer != null && allocatedContainer.delegationToken != null) { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName()); } int exitStatus = containerStatus.getExitStatus(); if (0 != exitStatus) { if (allocatedContainer != null) { numFailedContainers.incrementAndGet(); } // if (exitStatus == 1) { // // non-recoverable StreamingContainer failure // appDone = true; // finalStatus = FinalApplicationStatus.FAILED; // dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId(); // LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage); // } // else { // Recoverable failure or process killed (externally or via stop request by AM) // also occurs when a container was released by the application but never assigned/launched LOG.debug("Container {} failed or killed.", containerStatus.getContainerId()); dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString()); // } } else { // container completed successfully numCompletedContainers.incrementAndGet(); LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId()); } String containerIdStr = containerStatus.getContainerId().toString(); dnmgr.removeContainerAgent(containerIdStr); // record container stop event StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus()); ev.setReason(containerStatus.getDiagnostics()); dnmgr.recordEventAsync(ev); } if (dnmgr.forcedShutdown) { LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage); finalStatus = FinalApplicationStatus.FAILED; appDone = true; } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0 && dnmgr.containerStartRequests.isEmpty()) { LOG.debug("Exiting as no more containers are allocated or requested"); finalStatus = FinalApplicationStatus.SUCCEEDED; appDone = true; } LOG.debug("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" + numTotalContainers + ", requested=" + numRequestedContainers + ", released=" + numReleasedContainers + ", completed=" + numCompletedContainers + ", failed=" + numFailedContainers + ", currentAllocated=" + allocatedContainers.size()); // monitor child containers dnmgr.monitorHeartbeat(); } finishApplication(finalStatus, numTotalContainers); }
From source file:com.epam.hadoop.nv.yarn.Client.java
License:Apache License
Client(String appMasterMainClass, Configuration conf) { this.conf = conf; this.appMasterMainClass = appMasterMainClass; yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf);/*from www .j av a 2 s . co m*/ opts = new Options(); opts.addOption("appname", true, "Application Name. Default value - DistributedShell"); opts.addOption("priority", true, "Application Priority. Default 0"); opts.addOption("queue", true, "RM Queue in which this application is to be submitted"); opts.addOption("timeout", true, "Application timeout in milliseconds"); opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master"); opts.addOption("master_vcores", true, "Amount of virtual cores to be requested to run the application master"); opts.addOption("jar", true, "Jar file containing the application master"); opts.addOption("shell_command", true, "Shell command to be executed by " + "the Application Master. Can only specify either --shell_command " + "or --shell_script"); opts.addOption("shell_script", true, "Location of the shell script to be " + "executed. Can only specify either --shell_command or --shell_script"); opts.addOption("shell_args", true, "Command line args for the shell script." + "Multiple args can be separated by empty space."); opts.getOption("shell_args").setArgs(Option.UNLIMITED_VALUES); opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs"); opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers"); opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command"); opts.addOption("container_vcores", true, "Amount of virtual cores to be requested to run the shell command"); opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); opts.addOption("log_properties", true, "log4j.properties file"); opts.addOption("keep_containers_across_application_attempts", false, "Flag to indicate whether to keep containers across application attempts." + " If the flag is true, running containers will not be killed when" + " application attempt fails and these containers will be retrieved by" + " the new application attempt "); opts.addOption("debug", false, "Dump out debug information"); opts.addOption("help", false, "Print usage"); }
From source file:com.flyhz.avengers.framework.application.InitEnvApplication.java
License:Apache License
private void initJar() { LOG.info("initJar"); numTotalContainers = 1;//w w w. j a va 2s .c om containerMemory = 10; if (numTotalContainers == 0) { throw new IllegalArgumentException("Cannot run distributed shell with no containers"); } requestPriority = 0; YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); List<NodeReport> clusterNodeReports; try { clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got all node!"); for (NodeReport node : clusterNodeReports) { nodeSet.add(node.getNodeId().getHost()); } LOG.info("initJar Completed setting up app master command {}", initJarCmd); numTotalContainers = nodeSet.size(); for (int i = 0; i < numTotalContainers; i++) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } numRequestedContainers.set(numTotalContainers); } catch (YarnException e) { LOG.error("initJarAndClasspath", e); } catch (IOException e) { LOG.error("initJarAndClasspath", e); } finally { try { yarnClient.close(); } catch (IOException e) { } } }
From source file:com.flyhz.avengers.framework.AvengersAppMaster.java
License:Apache License
private void initJar() { LOG.info("initJar"); initCommon();/*ww w .j a v a2 s .c o m*/ this.currentProcess = "initJar"; YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); List<NodeReport> clusterNodeReports; try { clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got all node!"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); nodeSet.add(node.getNodeId().getHost()); } Vector<CharSequence> vargs = new Vector<CharSequence>(30); String appTempDir = conf.get("hadoop.tmp.dir"); FileSystem fs = DistributedFileSystem.get(conf); String hdfsJar = fs.getHomeDirectory() + "/avengers/" + this.appAttemptID.getApplicationId().getId() + "/AvengersAppMaster.jar"; vargs.add("if [ ! -e " + appTempDir + "/" + this.appAttemptID.getApplicationId().getId() + "/AvengersAppMaster.jar" + " ];"); vargs.add("then"); vargs.add("/bin/mkdir -p"); vargs.add(appTempDir + "/" + this.appAttemptID.getApplicationId().getId() + ";"); vargs.add(Environment.HADOOP_YARN_HOME.$() + "/bin/hadoop"); vargs.add("fs"); vargs.add("-copyToLocal"); vargs.add(hdfsJar); vargs.add(appTempDir + "/" + this.appAttemptID.getApplicationId().getId() + ";"); vargs.add("fi"); // Add log redirect params vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Avengers.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Avengers.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } initJarCmd = command.toString(); LOG.info("initJar Completed setting up app master command {}", initJarCmd); numTotalContainers = nodeSet.size(); for (int i = 0; i < numTotalContainers; i++) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } numRequestedContainers.set(numTotalContainers); while (!done && (numCompletedContainers.get() != numTotalContainers)) { try { Thread.sleep(200); } catch (InterruptedException ex) { } } } catch (YarnException e) { LOG.error("initJarAndClasspath", e); } catch (IOException e) { LOG.error("initJarAndClasspath", e); } finally { try { yarnClient.close(); } catch (IOException e) { } } }
From source file:com.flyhz.avengers.framework.AvengersClient.java
License:Apache License
AvengersClient(Configuration conf) { batchId = System.currentTimeMillis(); LOG.info("conf is {},batchId is {}", batchId, conf); this.conf = conf; System.setProperty("HADOOP_USER_NAME", "avengers"); yarnClient = YarnClient.createYarnClient(); conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); yarnClient.init(conf);/*from ww w.ja v a 2 s . c o m*/ opts = new Options(); opts.addOption("jar", true, "Jar file containing the application master"); Option all = new Option("all", false, "avengers all:include crawl fetch analyze"); all.setArgs(0); Option crawl = new Option("crawl", true, "avengers crawl"); crawl.setArgs(1); crawl.setArgName("batchId"); Option fetch = new Option("fetch", true, "avengers fetch"); fetch.setArgs(1); fetch.setArgName("batchId"); Option analyze = new Option("analyze", true, "avengers analyze"); analyze.setArgs(1); analyze.setArgName("batchId"); opts.addOption(all); opts.addOption(crawl); opts.addOption(fetch); opts.addOption(analyze); avengersProcessCmds = new LinkedHashMap<String, List<String>>(); this.hdfsPath = "avengers/" + batchId + "/avengers.jar"; }
From source file:com.github.hdl.tensorflow.yarn.app.Client.java
License:Apache License
Client(String appMasterMainClass, Configuration conf) { this.conf = conf; this.appMasterMainClass = appMasterMainClass; yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf);// www.ja va2s. c om opts = new Options(); opts.addOption(TFApplication.OPT_TF_APPNAME, true, "Application Name. Default value - tensorflow"); opts.addOption(TFApplication.OPT_TF_PRIORITY, true, "Application Priority. Default 0"); opts.addOption(TFApplication.OPT_TF_QUEUE, true, "RM Queue in which this application is to be submitted"); opts.addOption("jar", true, "Jar file containing the application master"); opts.addOption(TFApplication.OPT_TF_MASTER_MEMORY, true, "Amount of memory in MB to be requested to run the application master"); opts.addOption(TFApplication.OPT_TF_MASTER_VCORES, true, "Amount of virtual cores to be requested to run the application master"); opts.addOption(TFApplication.OPT_TF_CONTAINER_MEMORY, true, "Amount of memory in MB to be requested to run a tensorflow worker"); opts.addOption(TFApplication.OPT_TF_CONTAINER_VCORES, true, "Amount of virtual cores to be requested to run a tensorflow worker"); opts.addOption(TFApplication.OPT_TF_LOG_PROPERTIES, true, "log4j.properties file"); opts.addOption(TFApplication.OPT_TF_ATTEMPT_FAILURES_VALIDITY_INTERVAL, true, "when attempt_failures_validity_interval in milliseconds is set to > 0," + "the failure number will not take failures which happen out of " + "the validityInterval into failure count. " + "If failure count reaches to maxAppAttempts, " + "the application will be failed."); opts.addOption(TFApplication.OPT_TF_NODE_LABEL_EXPRESSION, true, "Node label expression to determine the nodes" + " where all the containers of this application" + " will be allocated, \"\" means containers" + " can be allocated anywhere, if you don't specify the option," + " default node_label_expression of queue will be used."); opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_POLICY, true, "Retry policy when container fails to run, " + "0: NEVER_RETRY, 1: RETRY_ON_ALL_ERRORS, " + "2: RETRY_ON_SPECIFIC_ERROR_CODES"); opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_ERROR_CODES, true, "When retry policy is set to RETRY_ON_SPECIFIC_ERROR_CODES, error " + "codes is specified with this option, " + "e.g. --container_retry_error_codes 1,2,3"); opts.addOption(TFApplication.OPT_TF_CONTAINER_MAX_RETRIES, true, "If container could retry, it specifies max retires"); opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_INTERVAL, true, "Interval between each retry, unit is milliseconds"); opts.addOption(TFApplication.OPT_TF_CLIENT, true, "Provide client python of tensorflow"); opts.addOption(TFApplication.OPT_TF_WORKER_NUM, true, "worker quantity of tensorflow"); opts.addOption(TFApplication.OPT_TF_PS_NUM, true, "ps quantity of tensorflow"); opts.addOption(TFApplication.OPT_TF_JNI_SO, true, "jni so of tensorflow"); opts.addOption(TFApplication.OPT_TF_ENABLE_TENSORBOARD, true, "flag whether enable the tensorboard"); opts.addOption(TFApplication.OPT_TF_TENSORBOARD_INPUTDIR, true, "the event storage dir of the tensorboard"); }
From source file:com.github.sakserv.minicluster.simpleyarnapp.Client.java
License:Apache License
public void run(String[] args) throws Exception { final String command = args[0]; final int n = Integer.valueOf(args[1]); final Path jarPath = new Path(args[2]); final String resourceManagerAddress = args[3]; final String resourceManagerHostname = args[4]; final String resourceManagerSchedulerAddress = args[5]; final String resourceManagerResourceTrackerAddress = args[6]; // Create yarnClient YarnConfiguration conf = new YarnConfiguration(); conf.set("yarn.resourcemanager.address", resourceManagerAddress); conf.set("yarn.resourcemanager.hostname", resourceManagerHostname); conf.set("yarn.resourcemanager.scheduler.address", resourceManagerSchedulerAddress); conf.set("yarn.resourcemanager.resource-tracker.address", resourceManagerResourceTrackerAddress); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf);//from ww w.j av a 2s .c om yarnClient.start(); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); amContainer.setCommands(Collections.singletonList("$JAVA_HOME/bin/java" + " -Xmx256M" + " com.hortonworks.simpleyarnapp.ApplicationMaster" + " " + command + " " + String.valueOf(n) + " " + resourceManagerAddress + " " + resourceManagerHostname + " " + resourceManagerSchedulerAddress + " " + resourceManagerResourceTrackerAddress + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); setupAppMasterJar(jarPath, appMasterJar); amContainer.setLocalResources(Collections.singletonMap("simple-yarn-app-1.1.0.jar", appMasterJar)); // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); setupAppMasterEnv(appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(256); capability.setVirtualCores(1); // Finally, set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName("simple-yarn-app"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue("default"); // queue // Submit application ApplicationId appId = appContext.getApplicationId(); System.out.println("Submitting application " + appId); yarnClient.submitApplication(appContext); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { Thread.sleep(100); appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); } System.out.println("Application " + appId + " finished with" + " state " + appState + " at " + appReport.getFinishTime()); }
From source file:com.google.mr4c.hadoop.yarn.YarnBinding.java
License:Open Source License
public void addClusterLimits(JobConf jobConf, ResourceConfig resConf) throws IOException { if (resConf.isEmpty()) { return;// w ww .j ava2s .c o m } Configuration conf = new Configuration(jobConf); YarnClient client = YarnClient.createYarnClient(); client.init(conf); client.start(); try { for (ResourceLimit limit : computeResourceLimits(client.getNodeReports())) { resConf.addLimit(limit); } } catch (YarnException ye) { throw new IOException(ye); } finally { client.stop(); } }
From source file:com.gpiskas.yarn.Client.java
License:Open Source License
private void run() throws Exception { conf = new YarnConfiguration(); // Create Yarn Client YarnClient client = YarnClient.createYarnClient(); client.init(conf);/*from w w w .j a v a 2 s . co m*/ client.start(); // Create Application YarnClientApplication app = client.createApplication(); // Create AM Container ContainerLaunchContext amCLC = Records.newRecord(ContainerLaunchContext.class); amCLC.setCommands(Collections.singletonList("$JAVA_HOME/bin/java" + " -Xmx256M" + " com.gpiskas.yarn.AppMaster" + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); // Set AM jar LocalResource jar = Records.newRecord(LocalResource.class); Utils.setUpLocalResource(Utils.YARNAPP_JAR_PATH, jar, conf); amCLC.setLocalResources(Collections.singletonMap(Utils.YARNAPP_JAR_NAME, jar)); // Set AM CLASSPATH Map<String, String> env = new HashMap<String, String>(); Utils.setUpEnv(env, conf); amCLC.setEnvironment(env); // Set AM resources Resource res = Records.newRecord(Resource.class); res.setMemory(256); res.setVirtualCores(1); // Create ApplicationSubmissionContext ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName("YARNAPP"); appContext.setQueue("default"); appContext.setAMContainerSpec(amCLC); appContext.setResource(res); // Submit Application ApplicationId id = appContext.getApplicationId(); System.out.println("Client: Submitting " + id); client.submitApplication(appContext); ApplicationReport appReport = client.getApplicationReport(id); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { Thread.sleep(1000); appReport = client.getApplicationReport(id); appState = appReport.getYarnApplicationState(); } System.out.println("Client: Finished " + id + " with state " + appState); }
From source file:com.hazelcast.yarn.HazelcastYarnClient.java
License:Open Source License
private YarnClient startYarnClient(YarnConfiguration conf) { YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); return yarnClient; }