List of usage examples for org.apache.hadoop.yarn.api.records Resource getVirtualCores
@Public @Evolving public abstract int getVirtualCores();
From source file:org.apache.myriad.scheduler.fgs.NMHeartBeatHandler.java
License:Apache License
@VisibleForTesting protected void handleStatusUpdate(RMNodeEvent event, RMContext context) { if (!(event instanceof RMNodeStatusEvent)) { logger.error("{} not an instance of {}", event.getClass().getName(), RMNodeStatusEvent.class.getName()); return;//from w w w.j av a 2 s .c om } RMNodeStatusEvent statusEvent = (RMNodeStatusEvent) event; RMNode rmNode = context.getRMNodes().get(event.getNodeId()); String hostName = rmNode.getNodeID().getHost(); Node host = nodeStore.getNode(hostName); if (host != null) { host.snapshotRunningContainers(); } /* * Set the new node capacity which is the sum of the current node resources plus those offered by Mesos. * If the sum is greater than the max capacity of the node, reject the offer. */ Resource offeredResources = getNewResourcesOfferedByMesos(hostName); Resource currentResources = getResourcesUnderUse(statusEvent); if (offerWithinResourceLimits(currentResources, offeredResources)) { yarnNodeCapacityMgr.setNodeCapacity(rmNode, Resources.add(currentResources, offeredResources)); logger.info("Updated resources for {} with {} cores and {} memory", rmNode.getNode().getName(), offeredResources.getVirtualCores(), offeredResources.getMemory()); } else { logger.info("Did not update {} with {} cores and {} memory, over max cpu cores and/or max memory", rmNode.getNode().getName(), offeredResources.getVirtualCores(), offeredResources.getMemory()); } }
From source file:org.apache.myriad.scheduler.fgs.NMHeartBeatHandler.java
License:Apache License
@VisibleForTesting protected boolean offerWithinResourceLimits(Resource currentResources, Resource offeredResources) { int newMemory = currentResources.getMemory() + offeredResources.getMemory(); int newCores = currentResources.getVirtualCores() + offeredResources.getVirtualCores(); return (newMemory <= conf.getJvmMaxMemoryMB() && newCores <= conf.getMaxCpus()); }
From source file:org.apache.myriad.scheduler.fgs.NMHeartBeatHandler.java
License:Apache License
@VisibleForTesting protected Resource getNewResourcesOfferedByMesos(String hostname) { OfferFeed feed = offerLifecycleMgr.getOfferFeed(hostname); List<Offer> offers = new ArrayList<>(); Protos.Offer offer;/* w ww . ja v a 2 s. c om*/ while ((offer = feed.poll()) != null) { offers.add(offer); offerLifecycleMgr.markAsConsumed(offer); } Resource fromMesosOffers = OfferUtils.getYarnResourcesFromMesosOffers(offers); if (logger.isDebugEnabled()) { logger.debug("NM on host {} got {} CPUs and {} memory from mesos", hostname, fromMesosOffers.getVirtualCores(), fromMesosOffers.getMemory()); } return fromMesosOffers; }
From source file:org.apache.myriad.scheduler.fgs.NMHeartBeatHandlerTest.java
License:Apache License
@Test public void testGetNewResourcesOfferedByMesos() throws Exception { Offer offerOne = TestObjectFactory.getOffer("localhost-one", "slave-one", "mock", "offer-one", 1.0, 512.0); Offer offerTwo = TestObjectFactory.getOffer("localhost-two", "slave-two", "mock", "offer-two", 2.0, 1024.0); olManager.addOffers(offerOne);/* ww w.j ava2 s . c o m*/ olManager.addOffers(offerTwo); Resource resourcesOne = handler.getNewResourcesOfferedByMesos("localhost-one"); assertEquals(1.0, resourcesOne.getVirtualCores(), 0.0); assertEquals(512.0, resourcesOne.getMemory(), 0.0); Resource resourcesTwo = handler.getNewResourcesOfferedByMesos("localhost-two"); assertEquals(2.0, resourcesTwo.getVirtualCores(), 0.0); assertEquals(1024.0, resourcesTwo.getMemory(), 0.0); }
From source file:org.apache.myriad.scheduler.fgs.YarnNodeCapacityManager.java
License:Apache License
private void removeYarnTask(RMContainer rmContainer) { if (containersNotNull(rmContainer)) { Protos.TaskID taskId = containerToTaskId(rmContainer); /*/* w w w . ja v a 2 s.c om*/ * Mark the task as killable within the ServerState object to flag the task * for the TaskTerminator daemon to kill the task */ state.makeTaskKillable(taskId); Node node = retrieveNode(rmContainer); if (node != null) { RMNode rmNode = node.getNode().getRMNode(); Resource resource = rmContainer.getContainer().getResource(); decrementNodeCapacity(rmNode, resource); LOGGER.info("Removed task yarn_{} with exit status freeing {} cpu and {} mem.", rmContainer.getContainer().toString(), rmContainer.getContainerExitStatus(), resource.getVirtualCores(), resource.getMemory()); } else { LOGGER.warn("The Node for the {} host was not found", rmContainer.getContainer().getNodeId().getHost()); } } }
From source file:org.apache.myriad.scheduler.fgs.YarnNodeCapacityManager.java
License:Apache License
/** * 1. Updates {@link RMNode#getTotalCapability()} with newCapacity. * 2. Sends out a {@link NodeResourceUpdateSchedulerEvent} that's handled by YARN's scheduler. * The scheduler updates the corresponding {@link SchedulerNode} with the newCapacity. * * @param rmNode//from ww w . j a v a2s. c o m * @param newCapacity */ @SuppressWarnings("unchecked") public void setNodeCapacity(RMNode rmNode, Resource newCapacity) { //NOOP prevent YARN warning changing to same size if ((Resources.equals(rmNode.getTotalCapability(), newCapacity))) { return; } if (yarnScheduler.getSchedulerNode(rmNode.getNodeID()) == null) { LOGGER.info("Yarn Scheduler doesn't have node {}, probably UNHEALTHY", rmNode.getNodeID()); return; } yarnSchedulerLock.lock(); try { if (newCapacity.getMemory() < 0 || newCapacity.getVirtualCores() < 0) { Resource zeroed = ResourceUtils.componentwiseMax(ZERO_RESOURCE, newCapacity); rmNode.getTotalCapability().setMemory(zeroed.getMemory()); rmNode.getTotalCapability().setVirtualCores(zeroed.getVirtualCores()); LOGGER.warn("Asked to set Node {} to a value less than zero! Had {}, setting to {}.", rmNode.getHttpAddress(), rmNode.getTotalCapability().toString(), zeroed.toString()); } else { rmNode.getTotalCapability().setMemory(newCapacity.getMemory()); rmNode.getTotalCapability().setVirtualCores(newCapacity.getVirtualCores()); if (LOGGER.isInfoEnabled()) { LOGGER.info("Setting capacity for node {} to {}", rmNode.getHostName(), newCapacity); } } // updates the scheduler with the new capacity for the NM. // the event is handled by the scheduler asynchronously rmContext.getDispatcher().getEventHandler() .handle(new NodeResourceUpdateSchedulerEvent(rmNode, ResourceOption .newInstance(rmNode.getTotalCapability(), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); } finally { yarnSchedulerLock.unlock(); } }
From source file:org.apache.myriad.scheduler.ResourceUtils.java
License:Apache License
public static Resource componentwiseMax(Resource lhs, Resource rhs) { int cores = Math.max(lhs.getVirtualCores(), rhs.getVirtualCores()); int mem = Math.max(lhs.getMemory(), rhs.getMemory()); return Resource.newInstance(cores, mem); }
From source file:org.apache.slider.providers.slideram.SliderAMClientProvider.java
License:Apache License
/** * Update the AM resource with any local needs * @param capability capability to update *///from www . j a va 2 s . com public void prepareAMResourceRequirements(MapOperations sliderAM, Resource capability) { capability.setMemory(sliderAM.getOptionInt(ResourceKeys.YARN_MEMORY, capability.getMemory())); capability.setVirtualCores(sliderAM.getOptionInt(ResourceKeys.YARN_CORES, capability.getVirtualCores())); }
From source file:org.apache.slider.server.appmaster.SliderAppMaster.java
License:Apache License
/** * Create and run the cluster./*from w w w. j av a 2 s .com*/ * @return exit code * @throws Throwable on a failure */ private int createAndRunCluster(String clustername) throws Throwable { //load the cluster description from the cd argument String sliderClusterDir = serviceArgs.getSliderClusterURI(); URI sliderClusterURI = new URI(sliderClusterDir); Path clusterDirPath = new Path(sliderClusterURI); log.info("Application defined at {}", sliderClusterURI); SliderFileSystem fs = getClusterFS(); // build up information about the running application -this // will be passed down to the cluster status MapOperations appInformation = new MapOperations(); AggregateConf instanceDefinition = InstanceIO.loadInstanceDefinitionUnresolved(fs, clusterDirPath); instanceDefinition.setName(clustername); log.info("Deploying cluster {}:", instanceDefinition); stateForProviders.setApplicationName(clustername); Configuration serviceConf = getConfig(); SecurityConfiguration securityConfiguration = new SecurityConfiguration(serviceConf, instanceDefinition, clustername); // obtain security state boolean securityEnabled = securityConfiguration.isSecurityEnabled(); // set the global security flag for the instance definition instanceDefinition.getAppConfOperations().set(KEY_SECURITY_ENABLED, securityEnabled); // triggers resolution and snapshotting in agent appState.updateInstanceDefinition(instanceDefinition); File confDir = getLocalConfDir(); if (!confDir.exists() || !confDir.isDirectory()) { log.info("Conf dir {} does not exist.", confDir); File parentFile = confDir.getParentFile(); log.info("Parent dir {}:\n{}", parentFile, SliderUtils.listDir(parentFile)); } // IP filtering serviceConf.set(HADOOP_HTTP_FILTER_INITIALIZERS, AM_FILTER_NAME); //get our provider MapOperations globalInternalOptions = getGlobalInternalOptions(); String providerType = globalInternalOptions.getMandatoryOption(InternalKeys.INTERNAL_PROVIDER_NAME); log.info("Cluster provider type is {}", providerType); SliderProviderFactory factory = SliderProviderFactory.createSliderProviderFactory(providerType); providerService = factory.createServerProvider(); // init the provider BUT DO NOT START IT YET initAndAddService(providerService); providerRMOperationHandler = new ProviderNotifyingOperationHandler(providerService); // create a slider AM provider sliderAMProvider = new SliderAMProviderService(); initAndAddService(sliderAMProvider); InetSocketAddress address = SliderUtils.getRmSchedulerAddress(serviceConf); log.info("RM is at {}", address); yarnRPC = YarnRPC.create(serviceConf); /* * Extract the container ID. This is then * turned into an (incompete) container */ appMasterContainerID = ConverterUtils.toContainerId( SliderUtils.mandatoryEnvVariable(ApplicationConstants.Environment.CONTAINER_ID.name())); appAttemptID = appMasterContainerID.getApplicationAttemptId(); ApplicationId appid = appAttemptID.getApplicationId(); log.info("AM for ID {}", appid.getId()); appInformation.put(StatusKeys.INFO_AM_CONTAINER_ID, appMasterContainerID.toString()); appInformation.put(StatusKeys.INFO_AM_APP_ID, appid.toString()); appInformation.put(StatusKeys.INFO_AM_ATTEMPT_ID, appAttemptID.toString()); Map<String, String> envVars; List<Container> liveContainers; /** * It is critical this section is synchronized, to stop async AM events * arriving while registering a restarting AM. */ synchronized (appState) { int heartbeatInterval = HEARTBEAT_INTERVAL; //add the RM client -this brings the callbacks in asyncRMClient = AMRMClientAsync.createAMRMClientAsync(heartbeatInterval, this); addService(asyncRMClient); //now bring it up deployChildService(asyncRMClient); //nmclient relays callbacks back to this class nmClientAsync = new NMClientAsyncImpl("nmclient", this); deployChildService(nmClientAsync); // set up secret manager secretManager = new ClientToAMTokenSecretManager(appAttemptID, null); if (securityEnabled) { // fix up the ACLs if they are not set String acls = getConfig().get(SliderXmlConfKeys.KEY_PROTOCOL_ACL); if (acls == null) { getConfig().set(SliderXmlConfKeys.KEY_PROTOCOL_ACL, "*"); } } //bring up the Slider RPC service startSliderRPCServer(instanceDefinition); rpcServiceAddress = rpcService.getConnectAddress(); appMasterHostname = rpcServiceAddress.getHostName(); appMasterRpcPort = rpcServiceAddress.getPort(); appMasterTrackingUrl = null; log.info("AM Server is listening at {}:{}", appMasterHostname, appMasterRpcPort); appInformation.put(StatusKeys.INFO_AM_HOSTNAME, appMasterHostname); appInformation.set(StatusKeys.INFO_AM_RPC_PORT, appMasterRpcPort); log.info("Starting Yarn registry"); registryOperations = startRegistryOperationsService(); log.info(registryOperations.toString()); //build the role map List<ProviderRole> providerRoles = new ArrayList<ProviderRole>(providerService.getRoles()); providerRoles.addAll(SliderAMClientProvider.ROLES); // Start up the WebApp and track the URL for it certificateManager = new CertificateManager(); MapOperations component = instanceDefinition.getAppConfOperations() .getComponent(SliderKeys.COMPONENT_AM); certificateManager.initialize(component); certificateManager.setPassphrase(instanceDefinition.getPassphrase()); if (component.getOptionBool(AgentKeys.KEY_AGENT_TWO_WAY_SSL_ENABLED, false)) { uploadServerCertForLocalization(clustername, fs); } startAgentWebApp(appInformation, serviceConf); int port = getPortToRequest(instanceDefinition); webApp = new SliderAMWebApp(registryOperations); WebApps.$for(SliderAMWebApp.BASE_PATH, WebAppApi.class, new WebAppApiImpl(this, stateForProviders, providerService, certificateManager, registryOperations), RestPaths.WS_CONTEXT).withHttpPolicy(serviceConf, HttpConfig.Policy.HTTP_ONLY).at(port) .start(webApp); String scheme = WebAppUtils.HTTP_PREFIX; appMasterTrackingUrl = scheme + appMasterHostname + ":" + webApp.port(); WebAppService<SliderAMWebApp> webAppService = new WebAppService<SliderAMWebApp>("slider", webApp); webAppService.init(serviceConf); webAppService.start(); addService(webAppService); appInformation.put(StatusKeys.INFO_AM_WEB_URL, appMasterTrackingUrl + "/"); appInformation.set(StatusKeys.INFO_AM_WEB_PORT, webApp.port()); // Register self with ResourceManager // This will start heartbeating to the RM // address = SliderUtils.getRmSchedulerAddress(asyncRMClient.getConfig()); log.info("Connecting to RM at {},address tracking URL={}", appMasterRpcPort, appMasterTrackingUrl); amRegistrationData = asyncRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); Resource maxResources = amRegistrationData.getMaximumResourceCapability(); containerMaxMemory = maxResources.getMemory(); containerMaxCores = maxResources.getVirtualCores(); appState.setContainerLimits(maxResources.getMemory(), maxResources.getVirtualCores()); // build the handler for RM request/release operations; this uses // the max value as part of its lookup rmOperationHandler = new AsyncRMOperationHandler(asyncRMClient, maxResources); // set the RM-defined maximum cluster values appInformation.put(ResourceKeys.YARN_CORES, Integer.toString(containerMaxCores)); appInformation.put(ResourceKeys.YARN_MEMORY, Integer.toString(containerMaxMemory)); // process the initial user to obtain the set of user // supplied credentials (tokens were passed in by client). Remove AMRM // token and HDFS delegation token, the latter because we will provide an // up to date token for container launches (getContainerCredentials()). UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); Credentials credentials = currentUser.getCredentials(); Iterator<Token<? extends TokenIdentifier>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<? extends TokenIdentifier> token = iter.next(); log.info("Token {}", token.getKind()); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME) || token.getKind().equals(DelegationTokenIdentifier.HDFS_DELEGATION_KIND)) { iter.remove(); } } // at this point this credentials map is probably clear, but leaving this // code to allow for future tokens... containerCredentials = credentials; if (securityEnabled) { secretManager.setMasterKey(amRegistrationData.getClientToAMTokenMasterKey().array()); applicationACLs = amRegistrationData.getApplicationACLs(); //tell the server what the ACLs are rpcService.getServer().refreshServiceAcl(serviceConf, new SliderAMPolicyProvider()); // perform keytab based login to establish kerberos authenticated // principal. Can do so now since AM registration with RM above required // tokens associated to principal String principal = securityConfiguration.getPrincipal(); File localKeytabFile = securityConfiguration.getKeytabFile(instanceDefinition); // Now log in... login(principal, localKeytabFile); // obtain new FS reference that should be kerberos based and different // than the previously cached reference fs = getClusterFS(); } // extract container list liveContainers = amRegistrationData.getContainersFromPreviousAttempts(); //now validate the installation Configuration providerConf = providerService.loadProviderConfigurationInformation(confDir); providerService.initializeApplicationConfiguration(instanceDefinition, fs); providerService.validateApplicationConfiguration(instanceDefinition, confDir, securityEnabled); //determine the location for the role history data Path historyDir = new Path(clusterDirPath, HISTORY_DIR_NAME); //build the instance appState.buildInstance(instanceDefinition, serviceConf, providerConf, providerRoles, fs.getFileSystem(), historyDir, liveContainers, appInformation, new SimpleReleaseSelector()); providerService.rebuildContainerDetails(liveContainers, instanceDefinition.getName(), appState.getRolePriorityMap()); // add the AM to the list of nodes in the cluster appState.buildAppMasterNode(appMasterContainerID, appMasterHostname, webApp.port(), appMasterHostname + ":" + webApp.port()); // build up environment variables that the AM wants set in every container // irrespective of provider and role. envVars = new HashMap<String, String>(); if (hadoop_user_name != null) { envVars.put(HADOOP_USER_NAME, hadoop_user_name); } } String rolesTmpSubdir = appMasterContainerID.toString() + "/roles"; String amTmpDir = globalInternalOptions.getMandatoryOption(InternalKeys.INTERNAL_AM_TMP_DIR); Path tmpDirPath = new Path(amTmpDir); Path launcherTmpDirPath = new Path(tmpDirPath, rolesTmpSubdir); fs.getFileSystem().mkdirs(launcherTmpDirPath); //launcher service launchService = new RoleLaunchService(actionQueues, providerService, fs, new Path(getGeneratedConfDir()), envVars, launcherTmpDirPath); deployChildService(launchService); appState.noteAMLaunched(); //Give the provider access to the state, and AM providerService.bind(stateForProviders, actionQueues, liveContainers); sliderAMProvider.bind(stateForProviders, actionQueues, liveContainers); // chaos monkey maybeStartMonkey(); // setup token renewal and expiry handling for long lived apps // if (SliderUtils.isHadoopClusterSecure(getConfig())) { // fsDelegationTokenManager = new FsDelegationTokenManager(actionQueues); // fsDelegationTokenManager.acquireDelegationToken(getConfig()); // } // if not a secure cluster, extract the username -it will be // propagated to workers if (!UserGroupInformation.isSecurityEnabled()) { hadoop_user_name = System.getenv(HADOOP_USER_NAME); log.info(HADOOP_USER_NAME + "='{}'", hadoop_user_name); } service_user_name = RegistryUtils.currentUser(); log.info("Registry service username ={}", service_user_name); // now do the registration registerServiceInstance(clustername, appid); // log the YARN and web UIs log.info("RM Webapp address {}", serviceConf.get(YarnConfiguration.RM_WEBAPP_ADDRESS)); log.info("slider Webapp address {}", appMasterTrackingUrl); // declare the cluster initialized log.info("Application Master Initialization Completed"); initCompleted.set(true); try { // start handling any scheduled events startQueueProcessing(); // Start the Slider AM provider sliderAMProvider.start(); // launch the real provider; this is expected to trigger a callback that // starts the node review process launchProviderService(instanceDefinition, confDir); //now block waiting to be told to exit the process waitForAMCompletionSignal(); } catch (Exception e) { log.error("Exception : {}", e, e); onAMStop(new ActionStopSlider(e)); } //shutdown time return finish(); }
From source file:org.apache.slider.server.appmaster.state.AbstractClusterServices.java
License:Apache License
/** * Normalise memory, CPU and other resources according to the YARN AM-supplied * values and the resource calculator in use (currently hard-coded to the * {@link DefaultResourceCalculator}.//from w ww .j a va 2s . co m * Those resources which aren't normalized (currently: CPU) are left * as is. * @param resource resource requirements of a role * @param minR minimum values of this queue * @param maxR max values of this queue * @return a normalized value. */ public Resource normalize(Resource resource, Resource minR, Resource maxR) { Preconditions.checkArgument(resource != null, "null resource"); Preconditions.checkArgument(minR != null, "null minR"); Preconditions.checkArgument(maxR != null, "null maxR"); Resource normalize = defaultResourceCalculator.normalize(resource, minR, maxR, minR); return newResource(normalize.getMemory(), resource.getVirtualCores()); }