Example usage for org.apache.hadoop.yarn.api.records Resource getVirtualCores

List of usage examples for org.apache.hadoop.yarn.api.records Resource getVirtualCores

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records Resource getVirtualCores.

Prototype

@Public
@Evolving
public abstract int getVirtualCores();

Source Link

Document

Get number of virtual cpu cores of the resource.

Usage

From source file:org.apache.myriad.scheduler.fgs.NMHeartBeatHandler.java

License:Apache License

@VisibleForTesting
protected void handleStatusUpdate(RMNodeEvent event, RMContext context) {
    if (!(event instanceof RMNodeStatusEvent)) {
        logger.error("{} not an instance of {}", event.getClass().getName(), RMNodeStatusEvent.class.getName());
        return;//from   w w  w.j av a  2  s .c  om
    }

    RMNodeStatusEvent statusEvent = (RMNodeStatusEvent) event;
    RMNode rmNode = context.getRMNodes().get(event.getNodeId());
    String hostName = rmNode.getNodeID().getHost();

    Node host = nodeStore.getNode(hostName);
    if (host != null) {
        host.snapshotRunningContainers();
    }

    /*
     * Set the new node capacity which is the sum of the current node resources plus those offered by Mesos. 
     * If the sum is greater than the max capacity of the node, reject the offer.
     */
    Resource offeredResources = getNewResourcesOfferedByMesos(hostName);
    Resource currentResources = getResourcesUnderUse(statusEvent);

    if (offerWithinResourceLimits(currentResources, offeredResources)) {
        yarnNodeCapacityMgr.setNodeCapacity(rmNode, Resources.add(currentResources, offeredResources));
        logger.info("Updated resources for {} with {} cores and {} memory", rmNode.getNode().getName(),
                offeredResources.getVirtualCores(), offeredResources.getMemory());
    } else {
        logger.info("Did not update {} with {} cores and {} memory, over max cpu cores and/or max memory",
                rmNode.getNode().getName(), offeredResources.getVirtualCores(), offeredResources.getMemory());
    }
}

From source file:org.apache.myriad.scheduler.fgs.NMHeartBeatHandler.java

License:Apache License

@VisibleForTesting
protected boolean offerWithinResourceLimits(Resource currentResources, Resource offeredResources) {
    int newMemory = currentResources.getMemory() + offeredResources.getMemory();
    int newCores = currentResources.getVirtualCores() + offeredResources.getVirtualCores();

    return (newMemory <= conf.getJvmMaxMemoryMB() && newCores <= conf.getMaxCpus());
}

From source file:org.apache.myriad.scheduler.fgs.NMHeartBeatHandler.java

License:Apache License

@VisibleForTesting
protected Resource getNewResourcesOfferedByMesos(String hostname) {
    OfferFeed feed = offerLifecycleMgr.getOfferFeed(hostname);
    List<Offer> offers = new ArrayList<>();
    Protos.Offer offer;/* w ww . ja  v  a  2  s. c om*/

    while ((offer = feed.poll()) != null) {
        offers.add(offer);
        offerLifecycleMgr.markAsConsumed(offer);
    }

    Resource fromMesosOffers = OfferUtils.getYarnResourcesFromMesosOffers(offers);

    if (logger.isDebugEnabled()) {
        logger.debug("NM on host {} got {} CPUs and {} memory from mesos", hostname,
                fromMesosOffers.getVirtualCores(), fromMesosOffers.getMemory());
    }

    return fromMesosOffers;
}

From source file:org.apache.myriad.scheduler.fgs.NMHeartBeatHandlerTest.java

License:Apache License

@Test
public void testGetNewResourcesOfferedByMesos() throws Exception {
    Offer offerOne = TestObjectFactory.getOffer("localhost-one", "slave-one", "mock", "offer-one", 1.0, 512.0);
    Offer offerTwo = TestObjectFactory.getOffer("localhost-two", "slave-two", "mock", "offer-two", 2.0, 1024.0);
    olManager.addOffers(offerOne);/* ww  w.j  ava2  s  . c o  m*/
    olManager.addOffers(offerTwo);
    Resource resourcesOne = handler.getNewResourcesOfferedByMesos("localhost-one");
    assertEquals(1.0, resourcesOne.getVirtualCores(), 0.0);
    assertEquals(512.0, resourcesOne.getMemory(), 0.0);
    Resource resourcesTwo = handler.getNewResourcesOfferedByMesos("localhost-two");
    assertEquals(2.0, resourcesTwo.getVirtualCores(), 0.0);
    assertEquals(1024.0, resourcesTwo.getMemory(), 0.0);
}

From source file:org.apache.myriad.scheduler.fgs.YarnNodeCapacityManager.java

License:Apache License

private void removeYarnTask(RMContainer rmContainer) {
    if (containersNotNull(rmContainer)) {
        Protos.TaskID taskId = containerToTaskId(rmContainer);
        /*/* w w  w  .  ja v a 2 s.c om*/
         * Mark the task as killable within the ServerState object to flag the task 
         * for the TaskTerminator daemon to kill the task
         */
        state.makeTaskKillable(taskId);

        Node node = retrieveNode(rmContainer);
        if (node != null) {
            RMNode rmNode = node.getNode().getRMNode();
            Resource resource = rmContainer.getContainer().getResource();
            decrementNodeCapacity(rmNode, resource);
            LOGGER.info("Removed task yarn_{} with exit status freeing {} cpu and {} mem.",
                    rmContainer.getContainer().toString(), rmContainer.getContainerExitStatus(),
                    resource.getVirtualCores(), resource.getMemory());
        } else {
            LOGGER.warn("The Node for the {} host was not found",
                    rmContainer.getContainer().getNodeId().getHost());
        }
    }
}

From source file:org.apache.myriad.scheduler.fgs.YarnNodeCapacityManager.java

License:Apache License

/**
 * 1. Updates {@link RMNode#getTotalCapability()} with newCapacity.
 * 2. Sends out a {@link NodeResourceUpdateSchedulerEvent} that's handled by YARN's scheduler.
 * The scheduler updates the corresponding {@link SchedulerNode} with the newCapacity.
 *
 * @param rmNode//from  ww  w . j  a  v  a2s. c o  m
 * @param newCapacity
 */
@SuppressWarnings("unchecked")
public void setNodeCapacity(RMNode rmNode, Resource newCapacity) {
    //NOOP prevent YARN warning changing to same size
    if ((Resources.equals(rmNode.getTotalCapability(), newCapacity))) {
        return;
    }
    if (yarnScheduler.getSchedulerNode(rmNode.getNodeID()) == null) {
        LOGGER.info("Yarn Scheduler doesn't have node {}, probably UNHEALTHY", rmNode.getNodeID());
        return;
    }
    yarnSchedulerLock.lock();
    try {
        if (newCapacity.getMemory() < 0 || newCapacity.getVirtualCores() < 0) {
            Resource zeroed = ResourceUtils.componentwiseMax(ZERO_RESOURCE, newCapacity);
            rmNode.getTotalCapability().setMemory(zeroed.getMemory());
            rmNode.getTotalCapability().setVirtualCores(zeroed.getVirtualCores());
            LOGGER.warn("Asked to set Node {} to a value less than zero!  Had {}, setting to {}.",
                    rmNode.getHttpAddress(), rmNode.getTotalCapability().toString(), zeroed.toString());
        } else {
            rmNode.getTotalCapability().setMemory(newCapacity.getMemory());
            rmNode.getTotalCapability().setVirtualCores(newCapacity.getVirtualCores());
            if (LOGGER.isInfoEnabled()) {
                LOGGER.info("Setting capacity for node {} to {}", rmNode.getHostName(), newCapacity);
            }
        }
        // updates the scheduler with the new capacity for the NM.
        // the event is handled by the scheduler asynchronously
        rmContext.getDispatcher().getEventHandler()
                .handle(new NodeResourceUpdateSchedulerEvent(rmNode, ResourceOption
                        .newInstance(rmNode.getTotalCapability(), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT)));
    } finally {
        yarnSchedulerLock.unlock();
    }
}

From source file:org.apache.myriad.scheduler.ResourceUtils.java

License:Apache License

public static Resource componentwiseMax(Resource lhs, Resource rhs) {
    int cores = Math.max(lhs.getVirtualCores(), rhs.getVirtualCores());
    int mem = Math.max(lhs.getMemory(), rhs.getMemory());
    return Resource.newInstance(cores, mem);
}

From source file:org.apache.slider.providers.slideram.SliderAMClientProvider.java

License:Apache License

/**
 * Update the AM resource with any local needs
 * @param capability capability to update
 *///from www .  j  a  va 2 s . com
public void prepareAMResourceRequirements(MapOperations sliderAM, Resource capability) {
    capability.setMemory(sliderAM.getOptionInt(ResourceKeys.YARN_MEMORY, capability.getMemory()));
    capability.setVirtualCores(sliderAM.getOptionInt(ResourceKeys.YARN_CORES, capability.getVirtualCores()));
}

From source file:org.apache.slider.server.appmaster.SliderAppMaster.java

License:Apache License

/**
 * Create and run the cluster./*from   w w w. j  av  a  2  s .com*/
 * @return exit code
 * @throws Throwable on a failure
 */
private int createAndRunCluster(String clustername) throws Throwable {

    //load the cluster description from the cd argument
    String sliderClusterDir = serviceArgs.getSliderClusterURI();
    URI sliderClusterURI = new URI(sliderClusterDir);
    Path clusterDirPath = new Path(sliderClusterURI);
    log.info("Application defined at {}", sliderClusterURI);
    SliderFileSystem fs = getClusterFS();

    // build up information about the running application -this
    // will be passed down to the cluster status
    MapOperations appInformation = new MapOperations();

    AggregateConf instanceDefinition = InstanceIO.loadInstanceDefinitionUnresolved(fs, clusterDirPath);
    instanceDefinition.setName(clustername);

    log.info("Deploying cluster {}:", instanceDefinition);

    stateForProviders.setApplicationName(clustername);

    Configuration serviceConf = getConfig();

    SecurityConfiguration securityConfiguration = new SecurityConfiguration(serviceConf, instanceDefinition,
            clustername);
    // obtain security state
    boolean securityEnabled = securityConfiguration.isSecurityEnabled();
    // set the global security flag for the instance definition
    instanceDefinition.getAppConfOperations().set(KEY_SECURITY_ENABLED, securityEnabled);

    // triggers resolution and snapshotting in agent
    appState.updateInstanceDefinition(instanceDefinition);

    File confDir = getLocalConfDir();
    if (!confDir.exists() || !confDir.isDirectory()) {
        log.info("Conf dir {} does not exist.", confDir);
        File parentFile = confDir.getParentFile();
        log.info("Parent dir {}:\n{}", parentFile, SliderUtils.listDir(parentFile));
    }

    // IP filtering
    serviceConf.set(HADOOP_HTTP_FILTER_INITIALIZERS, AM_FILTER_NAME);

    //get our provider
    MapOperations globalInternalOptions = getGlobalInternalOptions();
    String providerType = globalInternalOptions.getMandatoryOption(InternalKeys.INTERNAL_PROVIDER_NAME);
    log.info("Cluster provider type is {}", providerType);
    SliderProviderFactory factory = SliderProviderFactory.createSliderProviderFactory(providerType);
    providerService = factory.createServerProvider();
    // init the provider BUT DO NOT START IT YET
    initAndAddService(providerService);
    providerRMOperationHandler = new ProviderNotifyingOperationHandler(providerService);

    // create a slider AM provider
    sliderAMProvider = new SliderAMProviderService();
    initAndAddService(sliderAMProvider);

    InetSocketAddress address = SliderUtils.getRmSchedulerAddress(serviceConf);
    log.info("RM is at {}", address);
    yarnRPC = YarnRPC.create(serviceConf);

    /*
     * Extract the container ID. This is then
     * turned into an (incompete) container
     */
    appMasterContainerID = ConverterUtils.toContainerId(
            SliderUtils.mandatoryEnvVariable(ApplicationConstants.Environment.CONTAINER_ID.name()));
    appAttemptID = appMasterContainerID.getApplicationAttemptId();

    ApplicationId appid = appAttemptID.getApplicationId();
    log.info("AM for ID {}", appid.getId());

    appInformation.put(StatusKeys.INFO_AM_CONTAINER_ID, appMasterContainerID.toString());
    appInformation.put(StatusKeys.INFO_AM_APP_ID, appid.toString());
    appInformation.put(StatusKeys.INFO_AM_ATTEMPT_ID, appAttemptID.toString());

    Map<String, String> envVars;
    List<Container> liveContainers;
    /**
     * It is critical this section is synchronized, to stop async AM events
     * arriving while registering a restarting AM.
     */
    synchronized (appState) {
        int heartbeatInterval = HEARTBEAT_INTERVAL;

        //add the RM client -this brings the callbacks in
        asyncRMClient = AMRMClientAsync.createAMRMClientAsync(heartbeatInterval, this);
        addService(asyncRMClient);
        //now bring it up
        deployChildService(asyncRMClient);

        //nmclient relays callbacks back to this class
        nmClientAsync = new NMClientAsyncImpl("nmclient", this);
        deployChildService(nmClientAsync);

        // set up secret manager
        secretManager = new ClientToAMTokenSecretManager(appAttemptID, null);

        if (securityEnabled) {
            // fix up the ACLs if they are not set
            String acls = getConfig().get(SliderXmlConfKeys.KEY_PROTOCOL_ACL);
            if (acls == null) {
                getConfig().set(SliderXmlConfKeys.KEY_PROTOCOL_ACL, "*");
            }
        }
        //bring up the Slider RPC service
        startSliderRPCServer(instanceDefinition);

        rpcServiceAddress = rpcService.getConnectAddress();
        appMasterHostname = rpcServiceAddress.getHostName();
        appMasterRpcPort = rpcServiceAddress.getPort();
        appMasterTrackingUrl = null;
        log.info("AM Server is listening at {}:{}", appMasterHostname, appMasterRpcPort);
        appInformation.put(StatusKeys.INFO_AM_HOSTNAME, appMasterHostname);
        appInformation.set(StatusKeys.INFO_AM_RPC_PORT, appMasterRpcPort);

        log.info("Starting Yarn registry");
        registryOperations = startRegistryOperationsService();
        log.info(registryOperations.toString());

        //build the role map
        List<ProviderRole> providerRoles = new ArrayList<ProviderRole>(providerService.getRoles());
        providerRoles.addAll(SliderAMClientProvider.ROLES);

        // Start up the WebApp and track the URL for it
        certificateManager = new CertificateManager();
        MapOperations component = instanceDefinition.getAppConfOperations()
                .getComponent(SliderKeys.COMPONENT_AM);
        certificateManager.initialize(component);
        certificateManager.setPassphrase(instanceDefinition.getPassphrase());

        if (component.getOptionBool(AgentKeys.KEY_AGENT_TWO_WAY_SSL_ENABLED, false)) {
            uploadServerCertForLocalization(clustername, fs);
        }

        startAgentWebApp(appInformation, serviceConf);

        int port = getPortToRequest(instanceDefinition);

        webApp = new SliderAMWebApp(registryOperations);
        WebApps.$for(SliderAMWebApp.BASE_PATH, WebAppApi.class,
                new WebAppApiImpl(this, stateForProviders, providerService, certificateManager,
                        registryOperations),
                RestPaths.WS_CONTEXT).withHttpPolicy(serviceConf, HttpConfig.Policy.HTTP_ONLY).at(port)
                .start(webApp);
        String scheme = WebAppUtils.HTTP_PREFIX;
        appMasterTrackingUrl = scheme + appMasterHostname + ":" + webApp.port();
        WebAppService<SliderAMWebApp> webAppService = new WebAppService<SliderAMWebApp>("slider", webApp);

        webAppService.init(serviceConf);
        webAppService.start();
        addService(webAppService);

        appInformation.put(StatusKeys.INFO_AM_WEB_URL, appMasterTrackingUrl + "/");
        appInformation.set(StatusKeys.INFO_AM_WEB_PORT, webApp.port());

        // Register self with ResourceManager
        // This will start heartbeating to the RM
        // address = SliderUtils.getRmSchedulerAddress(asyncRMClient.getConfig());
        log.info("Connecting to RM at {},address tracking URL={}", appMasterRpcPort, appMasterTrackingUrl);
        amRegistrationData = asyncRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort,
                appMasterTrackingUrl);
        Resource maxResources = amRegistrationData.getMaximumResourceCapability();
        containerMaxMemory = maxResources.getMemory();
        containerMaxCores = maxResources.getVirtualCores();
        appState.setContainerLimits(maxResources.getMemory(), maxResources.getVirtualCores());

        // build the handler for RM request/release operations; this uses
        // the max value as part of its lookup
        rmOperationHandler = new AsyncRMOperationHandler(asyncRMClient, maxResources);

        // set the RM-defined maximum cluster values
        appInformation.put(ResourceKeys.YARN_CORES, Integer.toString(containerMaxCores));
        appInformation.put(ResourceKeys.YARN_MEMORY, Integer.toString(containerMaxMemory));

        // process the initial user to obtain the set of user
        // supplied credentials (tokens were passed in by client). Remove AMRM
        // token and HDFS delegation token, the latter because we will provide an
        // up to date token for container launches (getContainerCredentials()).
        UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
        Credentials credentials = currentUser.getCredentials();
        Iterator<Token<? extends TokenIdentifier>> iter = credentials.getAllTokens().iterator();
        while (iter.hasNext()) {
            Token<? extends TokenIdentifier> token = iter.next();
            log.info("Token {}", token.getKind());
            if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)
                    || token.getKind().equals(DelegationTokenIdentifier.HDFS_DELEGATION_KIND)) {
                iter.remove();
            }
        }
        // at this point this credentials map is probably clear, but leaving this
        // code to allow for future tokens...
        containerCredentials = credentials;

        if (securityEnabled) {
            secretManager.setMasterKey(amRegistrationData.getClientToAMTokenMasterKey().array());
            applicationACLs = amRegistrationData.getApplicationACLs();

            //tell the server what the ACLs are
            rpcService.getServer().refreshServiceAcl(serviceConf, new SliderAMPolicyProvider());
            // perform keytab based login to establish kerberos authenticated
            // principal.  Can do so now since AM registration with RM above required
            // tokens associated to principal
            String principal = securityConfiguration.getPrincipal();
            File localKeytabFile = securityConfiguration.getKeytabFile(instanceDefinition);
            // Now log in...
            login(principal, localKeytabFile);
            // obtain new FS reference that should be kerberos based and different
            // than the previously cached reference
            fs = getClusterFS();
        }

        // extract container list

        liveContainers = amRegistrationData.getContainersFromPreviousAttempts();

        //now validate the installation
        Configuration providerConf = providerService.loadProviderConfigurationInformation(confDir);

        providerService.initializeApplicationConfiguration(instanceDefinition, fs);

        providerService.validateApplicationConfiguration(instanceDefinition, confDir, securityEnabled);

        //determine the location for the role history data
        Path historyDir = new Path(clusterDirPath, HISTORY_DIR_NAME);

        //build the instance
        appState.buildInstance(instanceDefinition, serviceConf, providerConf, providerRoles, fs.getFileSystem(),
                historyDir, liveContainers, appInformation, new SimpleReleaseSelector());

        providerService.rebuildContainerDetails(liveContainers, instanceDefinition.getName(),
                appState.getRolePriorityMap());

        // add the AM to the list of nodes in the cluster

        appState.buildAppMasterNode(appMasterContainerID, appMasterHostname, webApp.port(),
                appMasterHostname + ":" + webApp.port());

        // build up environment variables that the AM wants set in every container
        // irrespective of provider and role.
        envVars = new HashMap<String, String>();
        if (hadoop_user_name != null) {
            envVars.put(HADOOP_USER_NAME, hadoop_user_name);
        }
    }
    String rolesTmpSubdir = appMasterContainerID.toString() + "/roles";

    String amTmpDir = globalInternalOptions.getMandatoryOption(InternalKeys.INTERNAL_AM_TMP_DIR);

    Path tmpDirPath = new Path(amTmpDir);
    Path launcherTmpDirPath = new Path(tmpDirPath, rolesTmpSubdir);
    fs.getFileSystem().mkdirs(launcherTmpDirPath);

    //launcher service
    launchService = new RoleLaunchService(actionQueues, providerService, fs, new Path(getGeneratedConfDir()),
            envVars, launcherTmpDirPath);

    deployChildService(launchService);

    appState.noteAMLaunched();

    //Give the provider access to the state, and AM
    providerService.bind(stateForProviders, actionQueues, liveContainers);
    sliderAMProvider.bind(stateForProviders, actionQueues, liveContainers);

    // chaos monkey
    maybeStartMonkey();

    // setup token renewal and expiry handling for long lived apps
    //    if (SliderUtils.isHadoopClusterSecure(getConfig())) {
    //      fsDelegationTokenManager = new FsDelegationTokenManager(actionQueues);
    //      fsDelegationTokenManager.acquireDelegationToken(getConfig());
    //    }

    // if not a secure cluster, extract the username -it will be
    // propagated to workers
    if (!UserGroupInformation.isSecurityEnabled()) {
        hadoop_user_name = System.getenv(HADOOP_USER_NAME);
        log.info(HADOOP_USER_NAME + "='{}'", hadoop_user_name);
    }
    service_user_name = RegistryUtils.currentUser();
    log.info("Registry service username ={}", service_user_name);

    // now do the registration
    registerServiceInstance(clustername, appid);

    // log the YARN and web UIs
    log.info("RM Webapp address {}", serviceConf.get(YarnConfiguration.RM_WEBAPP_ADDRESS));
    log.info("slider Webapp address {}", appMasterTrackingUrl);

    // declare the cluster initialized
    log.info("Application Master Initialization Completed");
    initCompleted.set(true);

    try {
        // start handling any scheduled events

        startQueueProcessing();

        // Start the Slider AM provider
        sliderAMProvider.start();

        // launch the real provider; this is expected to trigger a callback that
        // starts the node review process
        launchProviderService(instanceDefinition, confDir);

        //now block waiting to be told to exit the process
        waitForAMCompletionSignal();
    } catch (Exception e) {
        log.error("Exception : {}", e, e);
        onAMStop(new ActionStopSlider(e));
    }
    //shutdown time
    return finish();
}

From source file:org.apache.slider.server.appmaster.state.AbstractClusterServices.java

License:Apache License

/**
 * Normalise memory, CPU and other resources according to the YARN AM-supplied
 * values and the resource calculator in use (currently hard-coded to the
 * {@link DefaultResourceCalculator}.//from  w ww  .j a  va  2s . co m
 * Those resources which aren't normalized (currently: CPU) are left
 * as is.
 * @param resource resource requirements of a role
 * @param minR minimum values of this queue
 * @param maxR max values of this queue
 * @return a normalized value.
 */
public Resource normalize(Resource resource, Resource minR, Resource maxR) {
    Preconditions.checkArgument(resource != null, "null resource");
    Preconditions.checkArgument(minR != null, "null minR");
    Preconditions.checkArgument(maxR != null, "null maxR");

    Resource normalize = defaultResourceCalculator.normalize(resource, minR, maxR, minR);
    return newResource(normalize.getMemory(), resource.getVirtualCores());
}