Example usage for org.apache.hadoop.yarn.api.records ContainerRetryContext newInstance

List of usage examples for org.apache.hadoop.yarn.api.records ContainerRetryContext newInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records ContainerRetryContext newInstance.

Prototype

@Private
    @Unstable
    public static ContainerRetryContext newInstance(ContainerRetryPolicy retryPolicy, Set<Integer> errorCodes,
            int maxRetries, int retryInterval) 

Source Link

Usage

From source file:com.github.hdl.tensorflow.yarn.app.LaunchContainerThread.java

License:Apache License

@Override
/**/*from www  .  j a  v a 2s .  c  om*/
 * Connects to CM, sets up container launch context
 * for shell command and eventually dispatches the container
 * start request to the CM.
 */
public void run() {
    LOG.info("Setting up container launch container for containerid=" + container.getId());

    FileSystem fs = null;
    try {
        fs = FileSystem.get(appMaster.getConfiguration());
    } catch (IOException e) {
        e.printStackTrace();
    }

    TFContainer tfContainer = new TFContainer(appMaster);

    Map<String, String> env = tfContainer.setJavaEnv(appMaster.getConfiguration(), null);
    tfContainer.setNativePath(env);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    ApplicationId appId = appMaster.getAppAttempId().getApplicationId();

    try {
        tfContainer.addToLocalResources(fs, tfServerJar, TFContainer.SERVER_JAR_PATH, localResources);
        if (jniSoDfsPath != null && !jniSoDfsPath.equals("")) {
            tfContainer.addToLocalResources(fs, jniSoDfsPath, "libbridge.so", localResources);
        }

    } catch (IOException e) {
        e.printStackTrace();
    }

    LOG.info("clusterspec: " + this.serverAddress.getClusterSpec().toString());
    //this.serverAddress.getClusterSpec().testClusterString();
    ClusterSpec cs = this.serverAddress.getClusterSpec();

    StringBuilder command = null;
    try {
        command = tfContainer.makeCommands(containerMemory, cs.getBase64EncodedJsonString(),
                this.serverAddress.getJobName(), this.serverAddress.getTaskIndex());
    } catch (JsonProcessingException e) {
        LOG.info("cluster spec cannot convert into base64 json string!");
        e.printStackTrace();
    } catch (ClusterSpecException e) {
        e.printStackTrace();
    }

    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());
    if (serverAddress != null) {
        LOG.info(serverAddress.getJobName() + " : " + serverAddress.getAddress() + ":"
                + serverAddress.getPort());
    }

    ContainerRetryContext containerRetryContext = ContainerRetryContext.newInstance(containerRetryPolicy,
            containerRetryErrorCodes, containerMaxRetries, containrRetryInterval);
    for (String cmd : commands) {
        LOG.info("Container " + container.getId() + " command: " + cmd.toString());
    }
    ContainerLaunchContext ctx = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            appMaster.getAllTokens().duplicate(), null, containerRetryContext);
    appMaster.addContainer(container);
    appMaster.getNMClientAsync().startContainerAsync(container, ctx);
}

From source file:org.hdl.caffe.yarn.app.LaunchContainerThread.java

License:Apache License

@Override
/**/* ww  w. j a  v  a2 s .c om*/
 * Connects to CM, sets up container launch context
 * for shell command and eventually dispatches the container
 * start request to the CM.
 */
public void run() {
    LOG.info("Setting up container launch container for containerid=" + container.getId());

    FileSystem fs = null;
    try {
        fs = FileSystem.get(appMaster.getConfiguration());
    } catch (IOException e) {
        e.printStackTrace();
    }

    CaffeContainer caffeContainer = new CaffeContainer(appMaster);

    Map<String, String> env = caffeContainer.setJavaEnv(appMaster.getConfiguration(), null);
    caffeContainer.setNativePath(env);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    try {
        caffeContainer.addToLocalResources(fs, caffeProcessorJar, CaffeContainer.SERVER_JAR_PATH,
                localResources);
    } catch (IOException e) {
        e.printStackTrace();
    }

    LOG.info("cluster: " + this.serverAddress.getClusterSpec().toString());
    ClusterSpec cs = this.serverAddress.getClusterSpec();

    String command = null;
    try {
        command = caffeContainer.makeCommands(containerMemory, cs.getBase64EncodedJsonString(),
                this.serverAddress.getTaskIndex(), this.train, this.solver, this.feature, this.label,
                this.model, this.output, this.connection);
    } catch (JsonProcessingException e) {
        LOG.info("cluster spec cannot convert into base64 json string!");
        e.printStackTrace();
    } catch (ClusterSpecException e) {
        e.printStackTrace();
    }

    List<String> commands = new ArrayList<>();
    commands.add(command);
    if (serverAddress != null) {
        LOG.info(serverAddress.getAddress() + ":" + serverAddress.getPort());
    }

    ContainerRetryContext containerRetryContext = ContainerRetryContext.newInstance(containerRetryPolicy,
            containerRetryErrorCodes, containerMaxRetries, containrRetryInterval);
    for (String cmd : commands) {
        LOG.info("Container " + container.getId() + " command: " + cmd);
    }
    ContainerLaunchContext ctx = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            appMaster.getAllTokens().duplicate(), null, containerRetryContext);
    appMaster.addContainer(container);
    appMaster.getNMClientAsync().startContainerAsync(container, ctx);
}