Example usage for org.apache.hadoop.yarn.api.protocolrecords RegisterApplicationMasterResponse getContainersFromPreviousAttempts

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.protocolrecords RegisterApplicationMasterResponse getContainersFromPreviousAttempts.

Prototype

@Public
@Unstable
public abstract List<Container> getContainersFromPreviousAttempts();

Source Link

Document

Get the list of running containers as viewed by ResourceManager from previous application attempts.

Usage

From source file:ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/*from  ww  w.j a  v  a2  s  .c  o  m*/
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    try {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START);
    } catch (Exception e) {
        LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    // Setup local RPC Server to accept status requests directly from clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainers);
    try {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END);
    } catch (Exception e) {
        LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }
}

From source file:cn.edu.buaa.act.petuumOnYarn.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException//from  w ww  .  j a v a 2  s.  co m
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer
    // class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    // Setup local RPC Server to accept status requests directly from
    // clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers;
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainers);

}

From source file:com.github.hdl.tensorflow.yarn.app.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException//from w  w w .  java  2 s  .co m
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException, InterruptedException {
    LOG.info("Starting ApplicationMaster");

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.AbstractCallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    appMasterHostname = System.getenv(Environment.NM_HOST.name());
    TFApplicationRpcServer rpcServer = new TFApplicationRpcServer(appMasterHostname, new RpcForClient());
    appMasterRpcPort = rpcServer.getRpcPort();
    rpcServer.startRpcServiceThread();

    // Register self with ResourceManager
    // This will start heartbeating to the RM

    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    long maxMem = response.getMaximumResourceCapability().getMemorySize();
    LOG.info("Max mem capability of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capability of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    for (Container container : previousAMRunningContainers) {
        launchedContainers.add(container.getId());
    }
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainers);

}

From source file:com.inforefiner.hdata.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/*from   w w  w  .  j  av a 2s . co m*/
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException, InterruptedException {
    LOG.info("Starting ApplicationMaster");

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    startTimelineClient(conf);
    if (timelineClient != null) {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START,
                domainId, appSubmitterUgi);
    }

    // Setup local RPC Server to accept status requests directly from clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainers);
}

From source file:com.scistor.dshell.ScistorApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 * //from   w w  w  . j a v a  2 s. c o m
 * @throws YarnException
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    try {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START);
    } catch (Exception e) {
        LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }

    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    // Setup local RPC Server to accept status requests directly from
    // clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info("Received " + previousAMRunningContainers.size()
            + " previous AM's running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainersToRequest);
    try {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END);
    } catch (Exception e) {
        LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }
}

From source file:com.sogou.dockeronyarn.service.DockerApplicationMaster_24.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/*from   w  w  w  . j  a  v  a  2 s  . com*/
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    try {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START);
    } catch (Exception e) {
        LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    // Setup local RPC Server to accept status requests directly from clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainers);
    try {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END);
    } catch (Exception e) {
        LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }
    numRetryCount.set(0);
}

From source file:io.hops.tensorflow.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/* w w  w .  j av a  2  s  . c  o m*/
 * @throws IOException
 */
public void run() throws YarnException, IOException, InterruptedException {
    LOG.info("Starting ApplicationMaster. " + "Workers: " + numWorkers + ", Parameter servers: " + numPses);

    clusterSpecServer = new ClusterSpecGeneratorServer(appAttemptID.getApplicationId().toString(),
            numTotalContainers, numWorkers);
    LOG.info("Starting ClusterSpecGeneratorServer");
    int port = 2222;
    while (true) {
        try {
            clusterSpecServer.start(port);
            break;
        } catch (IOException e) {
            port++;
        }
    }
    environment.put("YARNTF_AM_ADDRESS", InetAddress.getLocalHost().getHostName() + ":" + port);
    environment.put("YARNTF_APPLICATION_ID", appAttemptID.getApplicationId().toString());

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    rmWrapper = new RMWrapper(this);
    rmWrapper.getClient().init(conf);
    rmWrapper.getClient().start();

    nmWrapper = new NMWrapper(this);
    nmWrapper.getClient().init(conf);
    nmWrapper.getClient().start();

    timelineHandler = new TimelineHandler(appAttemptID.toString(), domainId, appSubmitterUgi);
    timelineHandler.startClient(conf);
    if (timelineHandler.isClientNotNull()) {
        timelineHandler.publishApplicationAttemptEvent(YarntfEvent.YARNTF_APP_ATTEMPT_START);
    }

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    appMasterTrackingUrl = InetAddress.getLocalHost().getHostName() + ":" + TensorBoardServer.spawn(this);
    RegisterApplicationMasterResponse response = rmWrapper.getClient()
            .registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    int maxGPUS = response.getMaximumResourceCapability().getGPUs();
    LOG.info("Max gpus capabililty of resources in this cluster " + maxGPUS);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    if (containerGPUs > maxGPUS) {
        LOG.info("Container gpus specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerGPUs + ", max=" + maxGPUS);
        containerGPUs = maxGPUS;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    // Stop eventual containers from previous attempts
    for (Container prevContainer : previousAMRunningContainers) {
        LOG.info("Releasing YARN container " + prevContainer.getId());
        rmWrapper.getClient().releaseAssignedContainer(prevContainer.getId());
    }

    // Send request for containers to RM
    for (int i = 0; i < numWorkers; i++) {
        ContainerRequest workerRequest = setupContainerAskForRM(true);
        rmWrapper.getClient().addContainerRequest(workerRequest);
    }
    numRequestedContainers.addAndGet(numWorkers);
    for (int i = 0; i < numPses; i++) {
        ContainerRequest psRequest = setupContainerAskForRM(false);
        rmWrapper.getClient().addContainerRequest(psRequest);
    }
    numRequestedContainers.addAndGet(numPses);
}

From source file:me.haosdent.noya.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws org.apache.hadoop.yarn.exceptions.YarnException
 * @throws java.io.IOException//from  w  w  w .  j av  a2  s  .  c  o m
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    try {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START);
    } catch (Exception e) {
        LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }

    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    // Setup local RPC Server to accept status requests directly from clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info("Received " + previousAMRunningContainers.size()
            + " previous AM's running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainersToRequest);
    try {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END);
    } catch (Exception e) {
        LOG.error("App Attempt start event coud not be pulished for " + appAttemptID.toString(), e);
    }
}

From source file:org.apache.hama.bsp.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 * //from  w w  w  . ja v a2  s. c o  m
 * @throws org.apache.hadoop.yarn.exceptions.YarnException
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException, InterruptedException {
    LOG.info("Starting ApplicationMaster");

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(localConf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(localConf);
    nmClientAsync.start();

    // Setup local RPC Server to accept status requests directly from clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capability of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capability of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    for (Container container : previousAMRunningContainers) {
        launchedContainers.add(container.getId());
    }
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        AMRMClient.ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainers);
}

From source file:org.hdl.caffe.yarn.app.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/*w ww .ja  va 2  s  .  co  m*/
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException, InterruptedException {
    LOG.info("Starting ApplicationMaster");

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.AbstractCallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    appMasterHostname = System.getenv(Environment.NM_HOST.name());
    CaffeApplicationRpcServer rpcServer = new CaffeApplicationRpcServer(appMasterHostname, new RpcForClient());
    appMasterRpcPort = rpcServer.getRpcPort();
    rpcServer.startRpcServiceThread();

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);

    // Dump out information about cluster capability as seen by the
    // resource manager
    long maxMem = response.getMaximumResourceCapability().getMemorySize();
    LOG.info("Max mem capability of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capability of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    for (Container container : previousAMRunningContainers) {
        launchedContainers.add(container.getId());
    }
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();

    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainers);

}