Example usage for org.apache.hadoop.yarn.api.records ApplicationAttemptId fromString

List of usage examples for org.apache.hadoop.yarn.api.records ApplicationAttemptId fromString

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records ApplicationAttemptId fromString.

Prototype

@Public
    @Stable
    public static ApplicationAttemptId fromString(String appAttemptIdStr) 

Source Link

Usage

From source file:com.github.hdl.tensorflow.yarn.app.ApplicationMaster.java

License:Apache License

/**
 * Parse command line options// w w w.  j ava 2 s .co  m
 *
 * @param args Command line args
 * @return Whether init successful and run should be invoked
 * @throws ParseException
 * @throws IOException
 */
public boolean init(String[] args) throws ParseException, IOException {
    Options opts = new Options();
    opts.addOption(TFApplication.OPT_TF_APP_ATTEMPT_ID, true,
            "App Attempt ID. Not to be used unless for testing purposes");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_MEMORY, true,
            "Amount of memory in MB to be requested to run the shell command");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_VCORES, true,
            "Amount of virtual cores to be requested to run the shell command");
    opts.addOption(TFApplication.OPT_TF_PRIORITY, true, "Application Priority. Default 0");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_POLICY, true,
            "Retry policy when container fails to run, " + "0: NEVER_RETRY, 1: RETRY_ON_ALL_ERRORS, "
                    + "2: RETRY_ON_SPECIFIC_ERROR_CODES");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_ERROR_CODES, true,
            "When retry policy is set to RETRY_ON_SPECIFIC_ERROR_CODES, error "
                    + "codes is specified with this option, " + "e.g. --container_retry_error_codes 1,2,3");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_MAX_RETRIES, true,
            "If container could retry, it specifies max retires");
    opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_INTERVAL, true,
            "Interval between each retry, unit is milliseconds");

    opts.addOption(TFApplication.OPT_TF_SERVER_JAR, true, "Provide container jar of tensorflow");
    opts.addOption(TFApplication.OPT_TF_JNI_SO, true, "jni so of tensorflow");
    opts.addOption(TFApplication.OPT_TF_WORKER_NUM, true, "Provide worker server number of tensorflow");
    opts.addOption(TFApplication.OPT_TF_PS_NUM, true, "Provide ps server number of tensorflow");

    CommandLine cliParser = new GnuParser().parse(opts, args);

    if (args.length == 0) {
        printUsage(opts);
        throw new IllegalArgumentException("No args specified for application master to initialize");
    }

    if (fileExist(log4jPath)) {
        try {
            Log4jPropertyHelper.updateLog4jConfiguration(ApplicationMaster.class, log4jPath);
        } catch (Exception e) {
            LOG.warn("Can not set up custom log4j properties. " + e);
        }
    }

    Map<String, String> envs = System.getenv();

    if (!envs.containsKey(Environment.CONTAINER_ID.name())) {
        if (cliParser.hasOption(TFApplication.OPT_TF_APP_ATTEMPT_ID)) {
            String appIdStr = cliParser.getOptionValue(TFApplication.OPT_TF_APP_ATTEMPT_ID, "");
            appAttemptID = ApplicationAttemptId.fromString(appIdStr);
        } else {
            throw new IllegalArgumentException("Application Attempt Id not set in the environment");
        }
    } else {
        ContainerId containerId = ContainerId.fromString(envs.get(Environment.CONTAINER_ID.name()));
        appAttemptID = containerId.getApplicationAttemptId();
    }

    if (!envs.containsKey(ApplicationConstants.APP_SUBMIT_TIME_ENV)) {
        throw new RuntimeException(ApplicationConstants.APP_SUBMIT_TIME_ENV + " not set in the environment");
    }
    if (!envs.containsKey(Environment.NM_HOST.name())) {
        throw new RuntimeException(Environment.NM_HOST.name() + " not set in the environment");
    }
    if (!envs.containsKey(Environment.NM_HTTP_PORT.name())) {
        throw new RuntimeException(Environment.NM_HTTP_PORT + " not set in the environment");
    }
    if (!envs.containsKey(Environment.NM_PORT.name())) {
        throw new RuntimeException(Environment.NM_PORT.name() + " not set in the environment");
    }

    LOG.info("Application master for app" + ", appId=" + appAttemptID.getApplicationId().getId()
            + ", clustertimestamp=" + appAttemptID.getApplicationId().getClusterTimestamp() + ", attemptId="
            + appAttemptID.getAttemptId());

    containerMemory = Integer.parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_MEMORY, "256"));
    containerVirtualCores = Integer
            .parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_VCORES, "1"));

    numTotalWokerContainers = Integer.parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_WORKER_NUM, "1"));
    if (numTotalWokerContainers == 0) {
        throw new IllegalArgumentException("Cannot run tensroflow application with no worker containers");
    }

    numTotalParamServerContainer = Integer.parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_PS_NUM, "0"));
    numTotalContainers = numTotalWokerContainers + numTotalParamServerContainer;
    if (numTotalContainers == 0) {
        throw new IllegalArgumentException("Cannot run distributed shell with no containers");
    }

    requestPriority = Integer.parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_PRIORITY, "0"));

    containerRetryPolicy = ContainerRetryPolicy.values()[Integer
            .parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_RETRY_POLICY, "0"))];
    if (cliParser.hasOption(TFApplication.OPT_TF_CONTAINER_RETRY_ERROR_CODES)) {
        containerRetryErrorCodes = new HashSet<>();
        for (String errorCode : cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_RETRY_ERROR_CODES)
                .split(",")) {
            containerRetryErrorCodes.add(Integer.parseInt(errorCode));
        }
    }
    containerMaxRetries = Integer
            .parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_MAX_RETRIES, "0"));
    containrRetryInterval = Integer
            .parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_RETRY_INTERVAL, "0"));

    tfServerJar = cliParser.getOptionValue(TFApplication.OPT_TF_SERVER_JAR, TFAmContainer.APPMASTER_JAR_PATH);
    jniSoDfsPath = cliParser.getOptionValue(TFApplication.OPT_TF_JNI_SO, "");

    clusterSpec = ClusterSpec.makeClusterSpec(numTotalWokerContainers, numTotalParamServerContainer);

    return true;
}

From source file:org.hdl.caffe.yarn.app.ApplicationMaster.java

License:Apache License

/**
 * Parse command line options//from   w ww.j ava2  s  .c om
 *
 * @param args Command line args
 * @return Whether init successful and run should be invoked
 * @throws ParseException
 * @throws IOException
 */
public boolean init(String[] args) throws ParseException, IOException {
    Options opts = new Options();
    opts.addOption(CaffeApplication.OPT_CAFFE_APP_ATTEMPT_ID, true,
            "App Attempt ID. Not to be used unless for testing purposes");
    opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_MEMORY, true,
            "Amount of memory in MB to be requested to run the shell command");
    opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_VCORES, true,
            "Amount of virtual cores to be requested to run the shell command");
    opts.addOption(CaffeApplication.OPT_CAFFE_PRIORITY, true, "Application Priority. Default 0");
    opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_POLICY, true,
            "Retry policy when container fails to run, " + "0: NEVER_RETRY, 1: RETRY_ON_ALL_ERRORS, "
                    + "2: RETRY_ON_SPECIFIC_ERROR_CODES");
    opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_ERROR_CODES, true,
            "When retry policy is set to RETRY_ON_SPECIFIC_ERROR_CODES, error "
                    + "codes is specified with this option, " + "e.g. --container_retry_error_codes 1,2,3");
    opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_MAX_RETRIES, true,
            "If container could retry, it specifies max retires");
    opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_INTERVAL, true,
            "Interval between each retry, unit is milliseconds");

    opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_JAR, true, "Provide container jar of caffe");
    opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_NUM, true, "Provide processor number of caffe");
    opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_SOLVER, true, "solver_configuration");
    opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_TRAIN, true, "training_mode");
    opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_FEATURES, true, "name_of_output_blobs");
    opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_LABEL, true,
            "name of label blobs to be included in features");
    opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_MODEL, true, "model path");
    opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_OUTPUT, true, "output path");
    opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_CONNECTION, true, "network mode");

    CommandLine cliParser = new GnuParser().parse(opts, args);

    if (args.length == 0) {
        printUsage(opts);
        throw new IllegalArgumentException("No args specified for application master to initialize");
    }

    if (fileExist(log4jPath)) {
        try {
            Log4jPropertyHelper.updateLog4jConfiguration(ApplicationMaster.class, log4jPath);
        } catch (Exception e) {
            LOG.warn("Can not set up custom log4j properties. " + e);
        }
    }

    Map<String, String> envs = System.getenv();

    if (!envs.containsKey(Environment.CONTAINER_ID.name())) {
        if (cliParser.hasOption(CaffeApplication.OPT_CAFFE_APP_ATTEMPT_ID)) {
            String appIdStr = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_APP_ATTEMPT_ID, "");
            appAttemptID = ApplicationAttemptId.fromString(appIdStr);
        } else {
            throw new IllegalArgumentException("Application AttemptId not set in the environment");
        }
    } else {
        ContainerId containerId = ContainerId.fromString(envs.get(Environment.CONTAINER_ID.name()));
        appAttemptID = containerId.getApplicationAttemptId();
    }

    if (!envs.containsKey(ApplicationConstants.APP_SUBMIT_TIME_ENV)) {
        throw new RuntimeException(ApplicationConstants.APP_SUBMIT_TIME_ENV + " not set in the environment");
    }
    if (!envs.containsKey(Environment.NM_HOST.name())) {
        throw new RuntimeException(Environment.NM_HOST.name() + " not set in the environment");
    }
    if (!envs.containsKey(Environment.NM_HTTP_PORT.name())) {
        throw new RuntimeException(Environment.NM_HTTP_PORT + " not set in the environment");
    }
    if (!envs.containsKey(Environment.NM_PORT.name())) {
        throw new RuntimeException(Environment.NM_PORT.name() + " not set in the environment");
    }

    LOG.info("Application master for app" + ", appId=" + appAttemptID.getApplicationId().getId()
            + ", clustertimestamp=" + appAttemptID.getApplicationId().getClusterTimestamp() + ", attemptId="
            + appAttemptID.getAttemptId());

    containerMemory = Integer
            .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_MEMORY, "256"));
    containerVirtualCores = Integer
            .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_VCORES, "1"));

    numTotalProcessorContainers = Integer
            .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_NUM, "1"));
    if (numTotalProcessorContainers == 0) {
        throw new IllegalArgumentException("Cannot run caffe application with no containers");
    }
    numTotalContainers = numTotalProcessorContainers;
    if (numTotalContainers == 0) {
        throw new IllegalArgumentException("Cannot run distributed shell with no containers");
    }

    requestPriority = Integer.parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PRIORITY, "0"));

    containerRetryPolicy = ContainerRetryPolicy.values()[Integer
            .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_POLICY, "0"))];
    if (cliParser.hasOption(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_ERROR_CODES)) {
        containerRetryErrorCodes = new HashSet<>();
        for (String errorCode : cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_ERROR_CODES)
                .split(",")) {
            containerRetryErrorCodes.add(Integer.parseInt(errorCode));
        }
    }
    containerMaxRetries = Integer
            .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_MAX_RETRIES, "0"));
    containrRetryInterval = Integer
            .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_INTERVAL, "0"));

    caffeProcessorJar = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_JAR,
            CaffeAmContainer.APPMASTER_JAR_PATH);

    solver = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_SOLVER, "");
    train = cliParser.hasOption(CaffeApplication.OPT_CAFFE_PROCESSOR_TRAIN);
    feature = cliParser.hasOption(CaffeApplication.OPT_CAFFE_PROCESSOR_FEATURES);
    label = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_LABEL, "");
    model = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_MODEL, "");
    output = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_OUTPUT, "");
    connection = Integer
            .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_CONNECTION, "2"));

    clusterSpec = ClusterSpec.makeClusterSpec(numTotalProcessorContainers);

    return true;
}