List of usage examples for org.apache.hadoop.yarn.api.records Resource newInstance
@Public @Stable public static Resource newInstance(long memory, int vCores)
From source file:org.apache.tez.mapreduce.hadoop.MRHelpers.java
License:Apache License
/** * Extract the container resource requirements from the provided configuration, which would * otherwise have been used when running a Hadoop MapReduce mapper. </p> * <p/>/*ww w . j a v a 2 s . co m*/ * This is only meant to be used if frameworks are not setting up their own {@link * org.apache.hadoop.yarn.api.records.Resource} and would like to fallback to using resources * which may already be configured for Hadoop MapReduce mappers. * * @param conf Configuration with MR specific settings used to extract * information from * * @return Resource object used to define requirements for containers * running Map tasks */ public static Resource getResourceForMRMapper(Configuration conf) { return Resource.newInstance(conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB), conf.getInt(MRJobConfig.MAP_CPU_VCORES, MRJobConfig.DEFAULT_MAP_CPU_VCORES)); }
From source file:org.apache.tez.mapreduce.hadoop.MRHelpers.java
License:Apache License
/** * Extract the container resource requirements from the provided configuration, which would * otherwise have been used when running a Hadoop MapReduce reducer. </p> * <p/>/*ww w. ja va2s. c o m*/ * This is only meant to be used if frameworks are not setting up their own {@link * org.apache.hadoop.yarn.api.records.Resource} and would like to fallback to using resources * which may already be configured for Hadoop MapReduce reducers. * <p/> * Uses mapreduce.reduce.memory.mb and mapreduce.reduce.cpu.vcores from the * provided configuration. * * @param conf Configuration with MR specific settings used to extract * information from * @return Resource object used to define requirements for containers * running Reduce tasks */ public static Resource getResourceForMRReducer(Configuration conf) { return Resource.newInstance(conf.getInt(MRJobConfig.REDUCE_MEMORY_MB, MRJobConfig.DEFAULT_REDUCE_MEMORY_MB), conf.getInt(MRJobConfig.REDUCE_CPU_VCORES, MRJobConfig.DEFAULT_REDUCE_CPU_VCORES)); }
From source file:org.apache.tez.mapreduce.TestMRRJobsDAGApi.java
License:Apache License
@Test(timeout = 60000) public void testSleepJob() throws TezException, IOException, InterruptedException { SleepProcessorConfig spConf = new SleepProcessorConfig(1); DAG dag = DAG.create("TezSleepProcessor"); Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1, Resource.newInstance(1024, 1)); dag.addVertex(vertex);//from w ww .j a va 2s .co m TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig()); Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random.nextInt(100000)))); remoteFs.mkdirs(remoteStagingDir); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString()); TezClient tezSession = TezClient.create("TezSleepProcessor", tezConf, false); tezSession.start(); DAGClient dagClient = tezSession.submitDAG(dag); DAGStatus dagStatus = dagClient.getDAGStatus(null); while (!dagStatus.isCompleted()) { LOG.info( "Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState()); Thread.sleep(500l); dagStatus = dagClient.getDAGStatus(null); } dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS)); assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState()); assertNotNull(dagStatus.getDAGCounters()); assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName())); assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS)); ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true); tezSession.stop(); }
From source file:org.apache.tez.mapreduce.TestMRRJobsDAGApi.java
License:Apache License
@Test(timeout = 100000) public void testMultipleDAGsWithDuplicateName() throws TezException, IOException, InterruptedException { TezClient tezSession = null;/*from w ww . java 2 s. c o m*/ try { TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig()); Path remoteStagingDir = remoteFs .makeQualified(new Path("/tmp", String.valueOf(random.nextInt(100000)))); remoteFs.mkdirs(remoteStagingDir); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString()); tezSession = TezClient.create("OrderedWordCountSession", tezConf, true); tezSession.start(); SleepProcessorConfig spConf = new SleepProcessorConfig(1); for (int dagIndex = 1; dagIndex <= 2; dagIndex++) { DAG dag = DAG.create("TezSleepProcessor"); Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor .create(SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1, Resource.newInstance(1024, 1)); dag.addVertex(vertex); DAGClient dagClient = null; try { dagClient = tezSession.submitDAG(dag); if (dagIndex > 1) { fail("Should fail due to duplicate dag name for dagIndex: " + dagIndex); } } catch (TezException tex) { if (dagIndex > 1) { assertTrue(tex.getMessage().contains("Duplicate dag name ")); continue; } fail("DuplicateDAGName exception thrown for 1st DAG submission"); } DAGStatus dagStatus = dagClient.getDAGStatus(null); while (!dagStatus.isCompleted()) { LOG.debug("Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState()); Thread.sleep(500l); dagStatus = dagClient.getDAGStatus(null); } } } finally { if (tezSession != null) { tezSession.stop(); } } }
From source file:org.apache.tez.mapreduce.TestMRRJobsDAGApi.java
License:Apache License
@Test(timeout = 60000) public void testNonDefaultFSStagingDir() throws Exception { SleepProcessorConfig spConf = new SleepProcessorConfig(1); DAG dag = DAG.create("TezSleepProcessor"); Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1, Resource.newInstance(1024, 1)); dag.addVertex(vertex);/*from w w w .ja v a2 s. c om*/ TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig()); Path stagingDir = new Path(TEST_ROOT_DIR, "testNonDefaultFSStagingDir" + String.valueOf(random.nextInt(100000))); FileSystem localFs = FileSystem.getLocal(tezConf); stagingDir = localFs.makeQualified(stagingDir); localFs.mkdirs(stagingDir); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString()); TezClient tezSession = TezClient.create("TezSleepProcessor", tezConf, false); tezSession.start(); DAGClient dagClient = tezSession.submitDAG(dag); DAGStatus dagStatus = dagClient.getDAGStatus(null); while (!dagStatus.isCompleted()) { LOG.info( "Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState()); Thread.sleep(500l); dagStatus = dagClient.getDAGStatus(null); } dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS)); assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState()); assertNotNull(dagStatus.getDAGCounters()); assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName())); assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS)); ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true); tezSession.stop(); }
From source file:org.apache.tez.mapreduce.TestMRRJobsDAGApi.java
License:Apache License
@Test(timeout = 60000) public void testHistoryLogging() throws IOException, InterruptedException, TezException, ClassNotFoundException, YarnException { SleepProcessorConfig spConf = new SleepProcessorConfig(1); DAG dag = DAG.create("TezSleepProcessorHistoryLogging"); Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 2, Resource.newInstance(1024, 1)); dag.addVertex(vertex);//w w w . j av a2 s. c om TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig()); Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random.nextInt(100000)))); remoteFs.mkdirs(remoteStagingDir); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString()); FileSystem localFs = FileSystem.getLocal(tezConf); Path historyLogDir = new Path(TEST_ROOT_DIR, "testHistoryLogging"); localFs.mkdirs(historyLogDir); tezConf.set(TezConfiguration.TEZ_SIMPLE_HISTORY_LOGGING_DIR, localFs.makeQualified(historyLogDir).toString()); tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false); TezClient tezSession = TezClient.create("TezSleepProcessorHistoryLogging", tezConf); tezSession.start(); DAGClient dagClient = tezSession.submitDAG(dag); DAGStatus dagStatus = dagClient.getDAGStatus(null); while (!dagStatus.isCompleted()) { LOG.info( "Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState()); Thread.sleep(500l); dagStatus = dagClient.getDAGStatus(null); } assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState()); FileStatus historyLogFileStatus = null; for (FileStatus fileStatus : localFs.listStatus(historyLogDir)) { if (fileStatus.isDirectory()) { continue; } Path p = fileStatus.getPath(); if (p.getName().startsWith(SimpleHistoryLoggingService.LOG_FILE_NAME_PREFIX)) { historyLogFileStatus = fileStatus; break; } } Assert.assertNotNull(historyLogFileStatus); Assert.assertTrue(historyLogFileStatus.getLen() > 0); tezSession.stop(); }
From source file:org.apache.tez.mapreduce.TestMRRJobsDAGApi.java
License:Apache License
public State testMRRSleepJobDagSubmitCore(boolean dagViaRPC, boolean killDagWhileRunning, boolean closeSessionBeforeSubmit, TezClient reUseTezSession, boolean genSplitsInAM, Class<? extends InputInitializer> initializerClass, Map<String, LocalResource> additionalLocalResources) throws IOException, InterruptedException, TezException, ClassNotFoundException, YarnException { LOG.info("\n\n\nStarting testMRRSleepJobDagSubmit()."); JobConf stage1Conf = new JobConf(mrrTezCluster.getConfig()); JobConf stage2Conf = new JobConf(mrrTezCluster.getConfig()); JobConf stage3Conf = new JobConf(mrrTezCluster.getConfig()); stage1Conf.setLong(MRRSleepJob.MAP_SLEEP_TIME, 1); stage1Conf.setInt(MRRSleepJob.MAP_SLEEP_COUNT, 1); stage1Conf.setInt(MRJobConfig.NUM_MAPS, 1); stage1Conf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName()); stage1Conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); stage1Conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); stage1Conf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName()); stage1Conf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); stage2Conf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, 1); stage2Conf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, 1); stage2Conf.setInt(MRJobConfig.NUM_REDUCES, 1); stage2Conf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName()); stage2Conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); stage2Conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); stage2Conf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); stage3Conf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, 1); stage3Conf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, 1); stage3Conf.setInt(MRJobConfig.NUM_REDUCES, 1); stage3Conf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName()); stage3Conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); stage3Conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); MRHelpers.translateMRConfToTez(stage1Conf); MRHelpers.translateMRConfToTez(stage2Conf); MRHelpers.translateMRConfToTez(stage3Conf); MRHelpers.configureMRApiUsage(stage1Conf); MRHelpers.configureMRApiUsage(stage2Conf); MRHelpers.configureMRApiUsage(stage3Conf); Path remoteStagingDir = remoteFs .makeQualified(new Path("/tmp", String.valueOf(new Random().nextInt(100000)))); TezClientUtils.ensureStagingDirExists(conf, remoteStagingDir); UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf); UserPayload stage2Payload = TezUtils.createUserPayloadFromConf(stage2Conf); UserPayload stage3Payload = TezUtils.createUserPayloadFromConf(stage3Conf); DAG dag = DAG.create("testMRRSleepJobDagSubmit-" + random.nextInt(1000)); Class<? extends InputInitializer> inputInitializerClazz = genSplitsInAM ? (initializerClass == null ? MRInputAMSplitGenerator.class : initializerClass) : null;// w w w .j av a 2s. co m LOG.info("Using initializer class: " + initializerClass); DataSourceDescriptor dsd; if (!genSplitsInAM) { dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, remoteStagingDir, true); } else { if (initializerClass == null) { dsd = MRInputLegacy.createConfigBuilder(stage1Conf, SleepInputFormat.class).build(); } else { InputInitializerDescriptor iid = InputInitializerDescriptor.create(inputInitializerClazz.getName()); dsd = MRInputLegacy.createConfigBuilder(stage1Conf, SleepInputFormat.class) .setCustomInitializerDescriptor(iid).build(); } } Vertex stage1Vertex = Vertex.create("map", ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(stage1Payload), dsd.getNumberOfShards(), Resource.newInstance(256, 1)); stage1Vertex.addDataSource("MRInput", dsd); Vertex stage2Vertex = Vertex.create("ireduce", ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(stage2Payload), 1, Resource.newInstance(256, 1)); Vertex stage3Vertex = Vertex.create("reduce", ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(stage3Payload), 1, Resource.newInstance(256, 1)); stage3Conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, true); DataSinkDescriptor dataSinkDescriptor = MROutputLegacy .createConfigBuilder(stage3Conf, NullOutputFormat.class).build(); Assert.assertFalse(dataSinkDescriptor.getOutputDescriptor().getHistoryText().isEmpty()); stage3Vertex.addDataSink("MROutput", dataSinkDescriptor); // TODO env, resources dag.addVertex(stage1Vertex); dag.addVertex(stage2Vertex); dag.addVertex(stage3Vertex); Edge edge1 = Edge.create(stage1Vertex, stage2Vertex, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(stage2Payload), InputDescriptor.create(OrderedGroupedInputLegacy.class.getName()).setUserPayload(stage2Payload))); Edge edge2 = Edge.create(stage2Vertex, stage3Vertex, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(stage3Payload), InputDescriptor.create(OrderedGroupedInputLegacy.class.getName()).setUserPayload(stage3Payload))); dag.addEdge(edge1); dag.addEdge(edge2); TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig()); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString()); DAGClient dagClient = null; boolean reuseSession = reUseTezSession != null; TezClient tezSession = null; if (!dagViaRPC) { Preconditions.checkArgument(reuseSession == false); } if (!reuseSession) { TezConfiguration tempTezconf = new TezConfiguration(tezConf); if (!dagViaRPC) { tempTezconf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false); } else { tempTezconf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true); } tezSession = TezClient.create("testsession", tempTezconf); tezSession.start(); } else { tezSession = reUseTezSession; } if (!dagViaRPC) { // TODO Use utility method post TEZ-205 to figure out AM arguments etc. dagClient = tezSession.submitDAG(dag); } if (dagViaRPC && closeSessionBeforeSubmit) { YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(mrrTezCluster.getConfig()); yarnClient.start(); boolean sentKillSession = false; while (true) { Thread.sleep(500l); ApplicationReport appReport = yarnClient .getApplicationReport(tezSession.getAppMasterApplicationId()); if (appReport == null) { continue; } YarnApplicationState appState = appReport.getYarnApplicationState(); if (!sentKillSession) { if (appState == YarnApplicationState.RUNNING) { tezSession.stop(); sentKillSession = true; } } else { if (appState == YarnApplicationState.FINISHED || appState == YarnApplicationState.KILLED || appState == YarnApplicationState.FAILED) { LOG.info("Application completed after sending session shutdown" + ", yarnApplicationState=" + appState + ", finalAppStatus=" + appReport.getFinalApplicationStatus()); Assert.assertEquals(YarnApplicationState.FINISHED, appState); Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, appReport.getFinalApplicationStatus()); break; } } } yarnClient.stop(); return null; } if (dagViaRPC) { LOG.info("Submitting dag to tez session with appId=" + tezSession.getAppMasterApplicationId() + " and Dag Name=" + dag.getName()); if (additionalLocalResources != null) { tezSession.addAppMasterLocalFiles(additionalLocalResources); } dagClient = tezSession.submitDAG(dag); Assert.assertEquals(TezAppMasterStatus.RUNNING, tezSession.getAppMasterStatus()); } DAGStatus dagStatus = dagClient.getDAGStatus(null); while (!dagStatus.isCompleted()) { LOG.info( "Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState()); Thread.sleep(500l); if (killDagWhileRunning && dagStatus.getState() == DAGStatus.State.RUNNING) { LOG.info("Killing running dag/session"); if (dagViaRPC) { tezSession.stop(); } else { dagClient.tryKillDAG(); } } dagStatus = dagClient.getDAGStatus(null); } if (!reuseSession) { tezSession.stop(); } return dagStatus.getState(); }
From source file:org.apache.twill.internal.yarn.Hadoop21YarnAppClient.java
License:Apache License
@Override public ProcessLauncher<ApplicationMasterInfo> createLauncher(TwillSpecification twillSpec, @Nullable String schedulerQueue) throws Exception { // Request for new application YarnClientApplication application = yarnClient.createApplication(); final GetNewApplicationResponse response = application.getNewApplicationResponse(); final ApplicationId appId = response.getApplicationId(); // Setup the context for application submission final ApplicationSubmissionContext appSubmissionContext = application.getApplicationSubmissionContext(); appSubmissionContext.setApplicationId(appId); appSubmissionContext.setApplicationName(twillSpec.getName()); if (schedulerQueue != null) { appSubmissionContext.setQueue(schedulerQueue); }/*from ww w . ja va 2s . com*/ // TODO: Make it adjustable through TwillSpec (TWILL-90) // Set the resource requirement for AM final Resource capability = adjustMemory(response, Resource.newInstance(Constants.APP_MASTER_MEMORY_MB, 1)); ApplicationMasterInfo appMasterInfo = new ApplicationMasterInfo(appId, capability.getMemory(), capability.getVirtualCores()); ApplicationSubmitter submitter = new ApplicationSubmitter() { @Override public ProcessController<YarnApplicationReport> submit(YarnLaunchContext context) { ContainerLaunchContext launchContext = context.getLaunchContext(); appSubmissionContext.setAMContainerSpec(launchContext); appSubmissionContext.setResource(capability); appSubmissionContext.setMaxAppAttempts(2); try { yarnClient.submitApplication(appSubmissionContext); return new ProcessControllerImpl(yarnClient, appId); } catch (Exception e) { LOG.error("Failed to submit application {}", appId, e); throw Throwables.propagate(e); } } }; return new ApplicationMasterProcessLauncher(appMasterInfo, submitter); }
From source file:org.conan.myhadoop02.mr.yarntest.Client.java
License:Apache License
/** * Main run function for the client/* w w w. j a v a 2 s .c om*/ * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } if (domainId != null && domainId.length() > 0 && toCreateDomain) { prepareTimelineDomain(); } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); if (attemptFailuresValidityInterval >= 0) { appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); if (domainId != null && domainId.length() > 0) { env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId); } // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); } vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:org.hdl.caffe.yarn.app.ApplicationMaster.java
License:Apache License
/** * Setup the request that will be sent to the RM for the container ask. * * @return the setup ResourceRequest to be sent to RM *///from w w w . j a va2 s. c o m private ContainerRequest setupContainerAskForRM() { // setup requirements for hosts // using * as any host will do for the distributed shell app // set the priority for the request Priority pri = Priority.newInstance(requestPriority); // Set up resource type requirements // For now, memory and CPU are supported so we set memory and cpu requirements Resource capability = Resource.newInstance(containerMemory, containerVirtualCores); ContainerRequest request = new ContainerRequest(capability, null, null, pri); return request; }