List of usage examples for org.apache.hadoop.yarn.client.api.async.impl NMClientAsyncImpl NMClientAsyncImpl
@Deprecated
public NMClientAsyncImpl(String name, CallbackHandler callbackHandler)
From source file:org.apache.hoya.yarn.appmaster.HoyaAppMaster.java
License:Apache License
/** * Create and run the cluster./*from ww w. ja v a 2 s.co m*/ * @return exit code * @throws Throwable on a failure */ private int createAndRunCluster(String clustername) throws Throwable { HoyaVersionInfo.loadAndPrintVersionInfo(log); //load the cluster description from the cd argument String hoyaClusterDir = serviceArgs.getHoyaClusterURI(); URI hoyaClusterURI = new URI(hoyaClusterDir); Path clusterDirPath = new Path(hoyaClusterURI); HoyaFileSystem fs = getClusterFS(); // build up information about the running application -this // will be passed down to the cluster status MapOperations appInformation = new MapOperations(); AggregateConf instanceDefinition = InstanceIO.loadInstanceDefinitionUnresolved(fs, clusterDirPath); log.info("Deploying cluster {}:", instanceDefinition); //REVISIT: why is this done? appState.updateInstanceDefinition(instanceDefinition); File confDir = getLocalConfDir(); if (!confDir.exists() || !confDir.isDirectory()) { log.error("Bad conf dir {}", confDir); File parentFile = confDir.getParentFile(); log.error("Parent dir {}:\n{}", parentFile, HoyaUtils.listDir(parentFile)); throw new BadCommandArgumentsException("Configuration directory %s doesn't exist", confDir); } Configuration serviceConf = getConfig(); // Try to get the proper filtering of static resources through the yarn proxy working serviceConf.set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer"); conf = new YarnConfiguration(serviceConf); //get our provider MapOperations globalOptions = instanceDefinition.getInternalOperations().getGlobalOptions(); String providerType = globalOptions.getMandatoryOption(OptionKeys.INTERNAL_PROVIDER_NAME); log.info("Cluster provider type is {}", providerType); HoyaProviderFactory factory = HoyaProviderFactory.createHoyaProviderFactory(providerType); providerService = factory.createServerProvider(); // init the provider BUT DO NOT START IT YET providerService.init(getConfig()); addService(providerService); InetSocketAddress address = HoyaUtils.getRmSchedulerAddress(conf); log.info("RM is at {}", address); yarnRPC = YarnRPC.create(conf); /* * Extract the container ID. This is then * turned into an (incompete) container */ appMasterContainerID = ConverterUtils.toContainerId( HoyaUtils.mandatoryEnvVariable(ApplicationConstants.Environment.CONTAINER_ID.name())); appAttemptID = appMasterContainerID.getApplicationAttemptId(); ApplicationId appid = appAttemptID.getApplicationId(); log.info("Hoya AM for ID {}", appid.getId()); appInformation.put(StatusKeys.INFO_AM_CONTAINER_ID, appMasterContainerID.toString()); appInformation.put(StatusKeys.INFO_AM_APP_ID, appid.toString()); appInformation.put(StatusKeys.INFO_AM_ATTEMPT_ID, appAttemptID.toString()); UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); Credentials credentials = currentUser.getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); dob.close(); // Now remove the AM->RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); log.info("Token {}", token.getKind()); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); // set up secret manager secretManager = new ClientToAMTokenSecretManager(appAttemptID, null); // if not a secure cluster, extract the username -it will be // propagated to workers if (!UserGroupInformation.isSecurityEnabled()) { hoyaUsername = System.getenv(HADOOP_USER_NAME); log.info(HADOOP_USER_NAME + "='{}'", hoyaUsername); } Map<String, String> envVars; /** * It is critical this section is synchronized, to stop async AM events * arriving while registering a restarting AM. */ synchronized (appState) { int heartbeatInterval = HEARTBEAT_INTERVAL; //add the RM client -this brings the callbacks in asyncRMClient = AMRMClientAsync.createAMRMClientAsync(heartbeatInterval, this); addService(asyncRMClient); //wrap it for the app state model rmOperationHandler = new AsyncRMOperationHandler(asyncRMClient); //now bring it up runChildService(asyncRMClient); //nmclient relays callbacks back to this class nmClientAsync = new NMClientAsyncImpl("nmclient", this); runChildService(nmClientAsync); //bring up the Hoya RPC service startHoyaRPCServer(); InetSocketAddress rpcServiceAddr = rpcService.getConnectAddress(); appMasterHostname = rpcServiceAddr.getHostName(); appMasterRpcPort = rpcServiceAddr.getPort(); appMasterTrackingUrl = null; log.info("AM Server is listening at {}:{}", appMasterHostname, appMasterRpcPort); appInformation.put(StatusKeys.INFO_AM_HOSTNAME, appMasterHostname); appInformation.set(StatusKeys.INFO_AM_RPC_PORT, appMasterRpcPort); //build the role map List<ProviderRole> providerRoles = new ArrayList<ProviderRole>(providerService.getRoles()); providerRoles.addAll(HoyaAMClientProvider.ROLES); // Start up the WebApp and track the URL for it webApp = new HoyaAMWebApp(); WebApps.$for("hoyaam", WebAppApi.class, new WebAppApiImpl(this, appState, providerService), "ws") .with(serviceConf).start(webApp); appMasterTrackingUrl = "http://" + appMasterHostname + ":" + webApp.port(); WebAppService<HoyaAMWebApp> webAppService = new WebAppService<HoyaAMWebApp>("hoya", webApp); webAppService.init(conf); webAppService.start(); addService(webAppService); appInformation.put(StatusKeys.INFO_AM_WEB_URL, appMasterTrackingUrl + "/"); appInformation.set(StatusKeys.INFO_AM_WEB_PORT, webApp.port()); // Register self with ResourceManager // This will start heartbeating to the RM // address = HoyaUtils.getRmSchedulerAddress(asyncRMClient.getConfig()); log.info("Connecting to RM at {},address tracking URL={}", appMasterRpcPort, appMasterTrackingUrl); RegisterApplicationMasterResponse response = asyncRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); Resource maxResources = response.getMaximumResourceCapability(); containerMaxMemory = maxResources.getMemory(); containerMaxCores = maxResources.getVirtualCores(); appState.setContainerLimits(maxResources.getMemory(), maxResources.getVirtualCores()); // set the RM-defined maximum cluster values appInformation.put(ResourceKeys.YARN_CORES, Integer.toString(containerMaxCores)); appInformation.put(ResourceKeys.YARN_MEMORY, Integer.toString(containerMaxMemory)); boolean securityEnabled = UserGroupInformation.isSecurityEnabled(); if (securityEnabled) { secretManager.setMasterKey(response.getClientToAMTokenMasterKey().array()); applicationACLs = response.getApplicationACLs(); //tell the server what the ACLs are rpcService.getServer().refreshServiceAcl(conf, new HoyaAMPolicyProvider()); } // extract container list List<Container> liveContainers = AMRestartSupport.retrieveContainersFromPreviousAttempt(response); String amRestartSupported = Boolean.toString(liveContainers != null); appInformation.put(StatusKeys.INFO_AM_RESTART_SUPPORTED, amRestartSupported); //now validate the installation Configuration providerConf = providerService.loadProviderConfigurationInformation(confDir); providerService.validateApplicationConfiguration(instanceDefinition, confDir, securityEnabled); //determine the location for the role history data Path historyDir = new Path(clusterDirPath, HISTORY_DIR_NAME); //build the instance appState.buildInstance(instanceDefinition, providerConf, providerRoles, fs.getFileSystem(), historyDir, liveContainers, appInformation); // add the AM to the list of nodes in the cluster appState.buildAppMasterNode(appMasterContainerID, appMasterHostname, webApp.port(), appMasterHostname + ":" + webApp.port()); // build up environment variables that the AM wants set in every container // irrespective of provider and role. envVars = new HashMap<String, String>(); if (hoyaUsername != null) { envVars.put(HADOOP_USER_NAME, hoyaUsername); } } String rolesTmpSubdir = appMasterContainerID.toString() + "/roles"; String amTmpDir = globalOptions.getMandatoryOption(OptionKeys.INTERNAL_AM_TMP_DIR); Path tmpDirPath = new Path(amTmpDir); Path launcherTmpDirPath = new Path(tmpDirPath, rolesTmpSubdir); fs.getFileSystem().mkdirs(launcherTmpDirPath); //launcher service launchService = new RoleLaunchService(this, providerService, fs, new Path(getGeneratedConfDir()), envVars, launcherTmpDirPath); runChildService(launchService); appState.noteAMLaunched(); //Give the provider restricted access to the state providerService.bind(appState); // launch the provider; this is expected to trigger a callback that // brings up the service launchProviderService(instanceDefinition, confDir); try { //now block waiting to be told to exit the process waitForAMCompletionSignal(); //shutdown time } finally { finish(); } return amExitCode; }
From source file:org.apache.slider.server.appmaster.SliderAppMaster.java
License:Apache License
/** * Create and run the cluster.//from ww w .j a v a2 s. c om * @return exit code * @throws Throwable on a failure */ private int createAndRunCluster(String clustername) throws Throwable { //load the cluster description from the cd argument String sliderClusterDir = serviceArgs.getSliderClusterURI(); URI sliderClusterURI = new URI(sliderClusterDir); Path clusterDirPath = new Path(sliderClusterURI); log.info("Application defined at {}", sliderClusterURI); SliderFileSystem fs = getClusterFS(); // build up information about the running application -this // will be passed down to the cluster status MapOperations appInformation = new MapOperations(); AggregateConf instanceDefinition = InstanceIO.loadInstanceDefinitionUnresolved(fs, clusterDirPath); instanceDefinition.setName(clustername); log.info("Deploying cluster {}:", instanceDefinition); stateForProviders.setApplicationName(clustername); Configuration serviceConf = getConfig(); SecurityConfiguration securityConfiguration = new SecurityConfiguration(serviceConf, instanceDefinition, clustername); // obtain security state boolean securityEnabled = securityConfiguration.isSecurityEnabled(); // set the global security flag for the instance definition instanceDefinition.getAppConfOperations().set(KEY_SECURITY_ENABLED, securityEnabled); // triggers resolution and snapshotting in agent appState.updateInstanceDefinition(instanceDefinition); File confDir = getLocalConfDir(); if (!confDir.exists() || !confDir.isDirectory()) { log.info("Conf dir {} does not exist.", confDir); File parentFile = confDir.getParentFile(); log.info("Parent dir {}:\n{}", parentFile, SliderUtils.listDir(parentFile)); } // IP filtering serviceConf.set(HADOOP_HTTP_FILTER_INITIALIZERS, AM_FILTER_NAME); //get our provider MapOperations globalInternalOptions = getGlobalInternalOptions(); String providerType = globalInternalOptions.getMandatoryOption(InternalKeys.INTERNAL_PROVIDER_NAME); log.info("Cluster provider type is {}", providerType); SliderProviderFactory factory = SliderProviderFactory.createSliderProviderFactory(providerType); providerService = factory.createServerProvider(); // init the provider BUT DO NOT START IT YET initAndAddService(providerService); providerRMOperationHandler = new ProviderNotifyingOperationHandler(providerService); // create a slider AM provider sliderAMProvider = new SliderAMProviderService(); initAndAddService(sliderAMProvider); InetSocketAddress address = SliderUtils.getRmSchedulerAddress(serviceConf); log.info("RM is at {}", address); yarnRPC = YarnRPC.create(serviceConf); /* * Extract the container ID. This is then * turned into an (incompete) container */ appMasterContainerID = ConverterUtils.toContainerId( SliderUtils.mandatoryEnvVariable(ApplicationConstants.Environment.CONTAINER_ID.name())); appAttemptID = appMasterContainerID.getApplicationAttemptId(); ApplicationId appid = appAttemptID.getApplicationId(); log.info("AM for ID {}", appid.getId()); appInformation.put(StatusKeys.INFO_AM_CONTAINER_ID, appMasterContainerID.toString()); appInformation.put(StatusKeys.INFO_AM_APP_ID, appid.toString()); appInformation.put(StatusKeys.INFO_AM_ATTEMPT_ID, appAttemptID.toString()); Map<String, String> envVars; List<Container> liveContainers; /** * It is critical this section is synchronized, to stop async AM events * arriving while registering a restarting AM. */ synchronized (appState) { int heartbeatInterval = HEARTBEAT_INTERVAL; //add the RM client -this brings the callbacks in asyncRMClient = AMRMClientAsync.createAMRMClientAsync(heartbeatInterval, this); addService(asyncRMClient); //now bring it up deployChildService(asyncRMClient); //nmclient relays callbacks back to this class nmClientAsync = new NMClientAsyncImpl("nmclient", this); deployChildService(nmClientAsync); // set up secret manager secretManager = new ClientToAMTokenSecretManager(appAttemptID, null); if (securityEnabled) { // fix up the ACLs if they are not set String acls = getConfig().get(SliderXmlConfKeys.KEY_PROTOCOL_ACL); if (acls == null) { getConfig().set(SliderXmlConfKeys.KEY_PROTOCOL_ACL, "*"); } } //bring up the Slider RPC service startSliderRPCServer(instanceDefinition); rpcServiceAddress = rpcService.getConnectAddress(); appMasterHostname = rpcServiceAddress.getHostName(); appMasterRpcPort = rpcServiceAddress.getPort(); appMasterTrackingUrl = null; log.info("AM Server is listening at {}:{}", appMasterHostname, appMasterRpcPort); appInformation.put(StatusKeys.INFO_AM_HOSTNAME, appMasterHostname); appInformation.set(StatusKeys.INFO_AM_RPC_PORT, appMasterRpcPort); log.info("Starting Yarn registry"); registryOperations = startRegistryOperationsService(); log.info(registryOperations.toString()); //build the role map List<ProviderRole> providerRoles = new ArrayList<ProviderRole>(providerService.getRoles()); providerRoles.addAll(SliderAMClientProvider.ROLES); // Start up the WebApp and track the URL for it certificateManager = new CertificateManager(); MapOperations component = instanceDefinition.getAppConfOperations() .getComponent(SliderKeys.COMPONENT_AM); certificateManager.initialize(component); certificateManager.setPassphrase(instanceDefinition.getPassphrase()); if (component.getOptionBool(AgentKeys.KEY_AGENT_TWO_WAY_SSL_ENABLED, false)) { uploadServerCertForLocalization(clustername, fs); } startAgentWebApp(appInformation, serviceConf); int port = getPortToRequest(instanceDefinition); webApp = new SliderAMWebApp(registryOperations); WebApps.$for(SliderAMWebApp.BASE_PATH, WebAppApi.class, new WebAppApiImpl(this, stateForProviders, providerService, certificateManager, registryOperations), RestPaths.WS_CONTEXT).withHttpPolicy(serviceConf, HttpConfig.Policy.HTTP_ONLY).at(port) .start(webApp); String scheme = WebAppUtils.HTTP_PREFIX; appMasterTrackingUrl = scheme + appMasterHostname + ":" + webApp.port(); WebAppService<SliderAMWebApp> webAppService = new WebAppService<SliderAMWebApp>("slider", webApp); webAppService.init(serviceConf); webAppService.start(); addService(webAppService); appInformation.put(StatusKeys.INFO_AM_WEB_URL, appMasterTrackingUrl + "/"); appInformation.set(StatusKeys.INFO_AM_WEB_PORT, webApp.port()); // Register self with ResourceManager // This will start heartbeating to the RM // address = SliderUtils.getRmSchedulerAddress(asyncRMClient.getConfig()); log.info("Connecting to RM at {},address tracking URL={}", appMasterRpcPort, appMasterTrackingUrl); amRegistrationData = asyncRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); Resource maxResources = amRegistrationData.getMaximumResourceCapability(); containerMaxMemory = maxResources.getMemory(); containerMaxCores = maxResources.getVirtualCores(); appState.setContainerLimits(maxResources.getMemory(), maxResources.getVirtualCores()); // build the handler for RM request/release operations; this uses // the max value as part of its lookup rmOperationHandler = new AsyncRMOperationHandler(asyncRMClient, maxResources); // set the RM-defined maximum cluster values appInformation.put(ResourceKeys.YARN_CORES, Integer.toString(containerMaxCores)); appInformation.put(ResourceKeys.YARN_MEMORY, Integer.toString(containerMaxMemory)); // process the initial user to obtain the set of user // supplied credentials (tokens were passed in by client). Remove AMRM // token and HDFS delegation token, the latter because we will provide an // up to date token for container launches (getContainerCredentials()). UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); Credentials credentials = currentUser.getCredentials(); Iterator<Token<? extends TokenIdentifier>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<? extends TokenIdentifier> token = iter.next(); log.info("Token {}", token.getKind()); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME) || token.getKind().equals(DelegationTokenIdentifier.HDFS_DELEGATION_KIND)) { iter.remove(); } } // at this point this credentials map is probably clear, but leaving this // code to allow for future tokens... containerCredentials = credentials; if (securityEnabled) { secretManager.setMasterKey(amRegistrationData.getClientToAMTokenMasterKey().array()); applicationACLs = amRegistrationData.getApplicationACLs(); //tell the server what the ACLs are rpcService.getServer().refreshServiceAcl(serviceConf, new SliderAMPolicyProvider()); // perform keytab based login to establish kerberos authenticated // principal. Can do so now since AM registration with RM above required // tokens associated to principal String principal = securityConfiguration.getPrincipal(); File localKeytabFile = securityConfiguration.getKeytabFile(instanceDefinition); // Now log in... login(principal, localKeytabFile); // obtain new FS reference that should be kerberos based and different // than the previously cached reference fs = getClusterFS(); } // extract container list liveContainers = amRegistrationData.getContainersFromPreviousAttempts(); //now validate the installation Configuration providerConf = providerService.loadProviderConfigurationInformation(confDir); providerService.initializeApplicationConfiguration(instanceDefinition, fs); providerService.validateApplicationConfiguration(instanceDefinition, confDir, securityEnabled); //determine the location for the role history data Path historyDir = new Path(clusterDirPath, HISTORY_DIR_NAME); //build the instance appState.buildInstance(instanceDefinition, serviceConf, providerConf, providerRoles, fs.getFileSystem(), historyDir, liveContainers, appInformation, new SimpleReleaseSelector()); providerService.rebuildContainerDetails(liveContainers, instanceDefinition.getName(), appState.getRolePriorityMap()); // add the AM to the list of nodes in the cluster appState.buildAppMasterNode(appMasterContainerID, appMasterHostname, webApp.port(), appMasterHostname + ":" + webApp.port()); // build up environment variables that the AM wants set in every container // irrespective of provider and role. envVars = new HashMap<String, String>(); if (hadoop_user_name != null) { envVars.put(HADOOP_USER_NAME, hadoop_user_name); } } String rolesTmpSubdir = appMasterContainerID.toString() + "/roles"; String amTmpDir = globalInternalOptions.getMandatoryOption(InternalKeys.INTERNAL_AM_TMP_DIR); Path tmpDirPath = new Path(amTmpDir); Path launcherTmpDirPath = new Path(tmpDirPath, rolesTmpSubdir); fs.getFileSystem().mkdirs(launcherTmpDirPath); //launcher service launchService = new RoleLaunchService(actionQueues, providerService, fs, new Path(getGeneratedConfDir()), envVars, launcherTmpDirPath); deployChildService(launchService); appState.noteAMLaunched(); //Give the provider access to the state, and AM providerService.bind(stateForProviders, actionQueues, liveContainers); sliderAMProvider.bind(stateForProviders, actionQueues, liveContainers); // chaos monkey maybeStartMonkey(); // setup token renewal and expiry handling for long lived apps // if (SliderUtils.isHadoopClusterSecure(getConfig())) { // fsDelegationTokenManager = new FsDelegationTokenManager(actionQueues); // fsDelegationTokenManager.acquireDelegationToken(getConfig()); // } // if not a secure cluster, extract the username -it will be // propagated to workers if (!UserGroupInformation.isSecurityEnabled()) { hadoop_user_name = System.getenv(HADOOP_USER_NAME); log.info(HADOOP_USER_NAME + "='{}'", hadoop_user_name); } service_user_name = RegistryUtils.currentUser(); log.info("Registry service username ={}", service_user_name); // now do the registration registerServiceInstance(clustername, appid); // log the YARN and web UIs log.info("RM Webapp address {}", serviceConf.get(YarnConfiguration.RM_WEBAPP_ADDRESS)); log.info("slider Webapp address {}", appMasterTrackingUrl); // declare the cluster initialized log.info("Application Master Initialization Completed"); initCompleted.set(true); try { // start handling any scheduled events startQueueProcessing(); // Start the Slider AM provider sliderAMProvider.start(); // launch the real provider; this is expected to trigger a callback that // starts the node review process launchProviderService(instanceDefinition, confDir); //now block waiting to be told to exit the process waitForAMCompletionSignal(); } catch (Exception e) { log.error("Exception : {}", e, e); onAMStop(new ActionStopSlider(e)); } //shutdown time return finish(); }