List of usage examples for org.apache.hadoop.yarn.api.records ContainerId fromString
@Public @Stable public static ContainerId fromString(String containerIdStr)
From source file:com.github.hdl.tensorflow.yarn.app.ApplicationMaster.java
License:Apache License
/** * Parse command line options/*from w w w . jav a 2 s. com*/ * * @param args Command line args * @return Whether init successful and run should be invoked * @throws ParseException * @throws IOException */ public boolean init(String[] args) throws ParseException, IOException { Options opts = new Options(); opts.addOption(TFApplication.OPT_TF_APP_ATTEMPT_ID, true, "App Attempt ID. Not to be used unless for testing purposes"); opts.addOption(TFApplication.OPT_TF_CONTAINER_MEMORY, true, "Amount of memory in MB to be requested to run the shell command"); opts.addOption(TFApplication.OPT_TF_CONTAINER_VCORES, true, "Amount of virtual cores to be requested to run the shell command"); opts.addOption(TFApplication.OPT_TF_PRIORITY, true, "Application Priority. Default 0"); opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_POLICY, true, "Retry policy when container fails to run, " + "0: NEVER_RETRY, 1: RETRY_ON_ALL_ERRORS, " + "2: RETRY_ON_SPECIFIC_ERROR_CODES"); opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_ERROR_CODES, true, "When retry policy is set to RETRY_ON_SPECIFIC_ERROR_CODES, error " + "codes is specified with this option, " + "e.g. --container_retry_error_codes 1,2,3"); opts.addOption(TFApplication.OPT_TF_CONTAINER_MAX_RETRIES, true, "If container could retry, it specifies max retires"); opts.addOption(TFApplication.OPT_TF_CONTAINER_RETRY_INTERVAL, true, "Interval between each retry, unit is milliseconds"); opts.addOption(TFApplication.OPT_TF_SERVER_JAR, true, "Provide container jar of tensorflow"); opts.addOption(TFApplication.OPT_TF_JNI_SO, true, "jni so of tensorflow"); opts.addOption(TFApplication.OPT_TF_WORKER_NUM, true, "Provide worker server number of tensorflow"); opts.addOption(TFApplication.OPT_TF_PS_NUM, true, "Provide ps server number of tensorflow"); CommandLine cliParser = new GnuParser().parse(opts, args); if (args.length == 0) { printUsage(opts); throw new IllegalArgumentException("No args specified for application master to initialize"); } if (fileExist(log4jPath)) { try { Log4jPropertyHelper.updateLog4jConfiguration(ApplicationMaster.class, log4jPath); } catch (Exception e) { LOG.warn("Can not set up custom log4j properties. " + e); } } Map<String, String> envs = System.getenv(); if (!envs.containsKey(Environment.CONTAINER_ID.name())) { if (cliParser.hasOption(TFApplication.OPT_TF_APP_ATTEMPT_ID)) { String appIdStr = cliParser.getOptionValue(TFApplication.OPT_TF_APP_ATTEMPT_ID, ""); appAttemptID = ApplicationAttemptId.fromString(appIdStr); } else { throw new IllegalArgumentException("Application Attempt Id not set in the environment"); } } else { ContainerId containerId = ContainerId.fromString(envs.get(Environment.CONTAINER_ID.name())); appAttemptID = containerId.getApplicationAttemptId(); } if (!envs.containsKey(ApplicationConstants.APP_SUBMIT_TIME_ENV)) { throw new RuntimeException(ApplicationConstants.APP_SUBMIT_TIME_ENV + " not set in the environment"); } if (!envs.containsKey(Environment.NM_HOST.name())) { throw new RuntimeException(Environment.NM_HOST.name() + " not set in the environment"); } if (!envs.containsKey(Environment.NM_HTTP_PORT.name())) { throw new RuntimeException(Environment.NM_HTTP_PORT + " not set in the environment"); } if (!envs.containsKey(Environment.NM_PORT.name())) { throw new RuntimeException(Environment.NM_PORT.name() + " not set in the environment"); } LOG.info("Application master for app" + ", appId=" + appAttemptID.getApplicationId().getId() + ", clustertimestamp=" + appAttemptID.getApplicationId().getClusterTimestamp() + ", attemptId=" + appAttemptID.getAttemptId()); containerMemory = Integer.parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_MEMORY, "256")); containerVirtualCores = Integer .parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_VCORES, "1")); numTotalWokerContainers = Integer.parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_WORKER_NUM, "1")); if (numTotalWokerContainers == 0) { throw new IllegalArgumentException("Cannot run tensroflow application with no worker containers"); } numTotalParamServerContainer = Integer.parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_PS_NUM, "0")); numTotalContainers = numTotalWokerContainers + numTotalParamServerContainer; if (numTotalContainers == 0) { throw new IllegalArgumentException("Cannot run distributed shell with no containers"); } requestPriority = Integer.parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_PRIORITY, "0")); containerRetryPolicy = ContainerRetryPolicy.values()[Integer .parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_RETRY_POLICY, "0"))]; if (cliParser.hasOption(TFApplication.OPT_TF_CONTAINER_RETRY_ERROR_CODES)) { containerRetryErrorCodes = new HashSet<>(); for (String errorCode : cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_RETRY_ERROR_CODES) .split(",")) { containerRetryErrorCodes.add(Integer.parseInt(errorCode)); } } containerMaxRetries = Integer .parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_MAX_RETRIES, "0")); containrRetryInterval = Integer .parseInt(cliParser.getOptionValue(TFApplication.OPT_TF_CONTAINER_RETRY_INTERVAL, "0")); tfServerJar = cliParser.getOptionValue(TFApplication.OPT_TF_SERVER_JAR, TFAmContainer.APPMASTER_JAR_PATH); jniSoDfsPath = cliParser.getOptionValue(TFApplication.OPT_TF_JNI_SO, ""); clusterSpec = ClusterSpec.makeClusterSpec(numTotalWokerContainers, numTotalParamServerContainer); return true; }
From source file:org.apache.oozie.action.hadoop.LauncherAM.java
License:Apache License
public LauncherAM(AMRMClientAsyncFactory amrmClientAsyncFactory, AMRMCallBackHandler amrmCallBackHandler, HdfsOperations hdfsOperations, LocalFsOperations localFsOperations, PrepareActionsHandler prepareHandler, LauncherAMCallbackNotifierFactory callbackNotifierFactory, LauncherSecurityManager launcherSecurityManager, String containerId, Configuration launcherConf) { this.amrmClientAsyncFactory = Objects.requireNonNull(amrmClientAsyncFactory, "amrmClientAsyncFactory should not be null"); this.amrmCallBackHandler = Objects.requireNonNull(amrmCallBackHandler, "amrmCallBackHandler should not be null"); this.hdfsOperations = Objects.requireNonNull(hdfsOperations, "hdfsOperations should not be null"); this.localFsOperations = Objects.requireNonNull(localFsOperations, "localFsOperations should not be null"); this.prepareHandler = Objects.requireNonNull(prepareHandler, "prepareHandler should not be null"); this.callbackNotifierFactory = Objects.requireNonNull(callbackNotifierFactory, "callbackNotifierFactory should not be null"); this.launcherSecurityManager = Objects.requireNonNull(launcherSecurityManager, "launcherSecurityManager should not be null"); this.containerId = ContainerId .fromString(Objects.requireNonNull(containerId, "containerId should not be null")); this.launcherConf = Objects.requireNonNull(launcherConf, "launcherConf should not be null"); }
From source file:org.apache.samza.webapp.YarnContainerHeartbeatServlet.java
License:Apache License
@Override protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { ContainerId yarnContainerId;// w w w . ja v a2 s.c o m PrintWriter printWriter = resp.getWriter(); String containerIdParam = req.getParameter(YARN_CONTAINER_ID); ContainerHeartbeatResponse response; resp.setContentType(APPLICATION_JSON); boolean alive = false; try { yarnContainerId = ContainerId.fromString(containerIdParam); for (YarnContainer yarnContainer : yarnAppState.runningProcessors.values()) { if (yarnContainer.id().compareTo(yarnContainerId) == 0) { alive = true; break; } } if (!alive) { heartbeatsExpiredCount.inc(); } response = new ContainerHeartbeatResponse(alive); printWriter.write(mapper.writeValueAsString(response)); } catch (IllegalArgumentException e) { LOG.error("Container ID {} passed is invalid", containerIdParam); resp.sendError(HttpServletResponse.SC_BAD_REQUEST, e.getMessage()); } }
From source file:org.apache.tez.dag.history.logging.ats.TimelineCachePluginImpl.java
License:Apache License
private TimelineEntityGroupId convertToTimelineEntityGroupId(String entityType, String entityId) { if (entityType == null || entityType.isEmpty() || entityId == null || entityId.isEmpty()) { return null; }/*from w w w . j av a2s . c o m*/ if (entityType.equals(EntityTypes.TEZ_DAG_ID.name())) { TezDAGID dagId = TezDAGID.fromString(entityId); if (dagId != null) { return TimelineEntityGroupId.newInstance(dagId.getApplicationId(), dagId.toString()); } } else if (entityType.equals(EntityTypes.TEZ_VERTEX_ID.name())) { TezVertexID vertexID = TezVertexID.fromString(entityId); if (vertexID != null) { return TimelineEntityGroupId.newInstance(vertexID.getDAGId().getApplicationId(), vertexID.getDAGId().toString()); } } else if (entityType.equals(EntityTypes.TEZ_TASK_ID.name())) { TezTaskID taskID = TezTaskID.fromString(entityId); if (taskID != null) { return TimelineEntityGroupId.newInstance(taskID.getVertexID().getDAGId().getApplicationId(), taskID.getVertexID().getDAGId().toString()); } } else if (entityType.equals(EntityTypes.TEZ_TASK_ATTEMPT_ID.name())) { TezTaskAttemptID taskAttemptID = TezTaskAttemptID.fromString(entityId); if (taskAttemptID != null) { return TimelineEntityGroupId.newInstance( taskAttemptID.getTaskID().getVertexID().getDAGId().getApplicationId(), taskAttemptID.getTaskID().getVertexID().getDAGId().toString()); } } else if (entityType.equals(EntityTypes.TEZ_CONTAINER_ID.name())) { String cId = entityId; if (cId.startsWith("tez_")) { cId = cId.substring(4); } ContainerId containerId = ContainerId.fromString(cId); if (containerId != null) { return TimelineEntityGroupId.newInstance(containerId.getApplicationAttemptId().getApplicationId(), containerId.getApplicationAttemptId().getApplicationId().toString()); } } return null; }
From source file:org.hdl.caffe.yarn.app.ApplicationMaster.java
License:Apache License
/** * Parse command line options/*from w w w . java2 s .co m*/ * * @param args Command line args * @return Whether init successful and run should be invoked * @throws ParseException * @throws IOException */ public boolean init(String[] args) throws ParseException, IOException { Options opts = new Options(); opts.addOption(CaffeApplication.OPT_CAFFE_APP_ATTEMPT_ID, true, "App Attempt ID. Not to be used unless for testing purposes"); opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_MEMORY, true, "Amount of memory in MB to be requested to run the shell command"); opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_VCORES, true, "Amount of virtual cores to be requested to run the shell command"); opts.addOption(CaffeApplication.OPT_CAFFE_PRIORITY, true, "Application Priority. Default 0"); opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_POLICY, true, "Retry policy when container fails to run, " + "0: NEVER_RETRY, 1: RETRY_ON_ALL_ERRORS, " + "2: RETRY_ON_SPECIFIC_ERROR_CODES"); opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_ERROR_CODES, true, "When retry policy is set to RETRY_ON_SPECIFIC_ERROR_CODES, error " + "codes is specified with this option, " + "e.g. --container_retry_error_codes 1,2,3"); opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_MAX_RETRIES, true, "If container could retry, it specifies max retires"); opts.addOption(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_INTERVAL, true, "Interval between each retry, unit is milliseconds"); opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_JAR, true, "Provide container jar of caffe"); opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_NUM, true, "Provide processor number of caffe"); opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_SOLVER, true, "solver_configuration"); opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_TRAIN, true, "training_mode"); opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_FEATURES, true, "name_of_output_blobs"); opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_LABEL, true, "name of label blobs to be included in features"); opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_MODEL, true, "model path"); opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_OUTPUT, true, "output path"); opts.addOption(CaffeApplication.OPT_CAFFE_PROCESSOR_CONNECTION, true, "network mode"); CommandLine cliParser = new GnuParser().parse(opts, args); if (args.length == 0) { printUsage(opts); throw new IllegalArgumentException("No args specified for application master to initialize"); } if (fileExist(log4jPath)) { try { Log4jPropertyHelper.updateLog4jConfiguration(ApplicationMaster.class, log4jPath); } catch (Exception e) { LOG.warn("Can not set up custom log4j properties. " + e); } } Map<String, String> envs = System.getenv(); if (!envs.containsKey(Environment.CONTAINER_ID.name())) { if (cliParser.hasOption(CaffeApplication.OPT_CAFFE_APP_ATTEMPT_ID)) { String appIdStr = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_APP_ATTEMPT_ID, ""); appAttemptID = ApplicationAttemptId.fromString(appIdStr); } else { throw new IllegalArgumentException("Application AttemptId not set in the environment"); } } else { ContainerId containerId = ContainerId.fromString(envs.get(Environment.CONTAINER_ID.name())); appAttemptID = containerId.getApplicationAttemptId(); } if (!envs.containsKey(ApplicationConstants.APP_SUBMIT_TIME_ENV)) { throw new RuntimeException(ApplicationConstants.APP_SUBMIT_TIME_ENV + " not set in the environment"); } if (!envs.containsKey(Environment.NM_HOST.name())) { throw new RuntimeException(Environment.NM_HOST.name() + " not set in the environment"); } if (!envs.containsKey(Environment.NM_HTTP_PORT.name())) { throw new RuntimeException(Environment.NM_HTTP_PORT + " not set in the environment"); } if (!envs.containsKey(Environment.NM_PORT.name())) { throw new RuntimeException(Environment.NM_PORT.name() + " not set in the environment"); } LOG.info("Application master for app" + ", appId=" + appAttemptID.getApplicationId().getId() + ", clustertimestamp=" + appAttemptID.getApplicationId().getClusterTimestamp() + ", attemptId=" + appAttemptID.getAttemptId()); containerMemory = Integer .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_MEMORY, "256")); containerVirtualCores = Integer .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_VCORES, "1")); numTotalProcessorContainers = Integer .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_NUM, "1")); if (numTotalProcessorContainers == 0) { throw new IllegalArgumentException("Cannot run caffe application with no containers"); } numTotalContainers = numTotalProcessorContainers; if (numTotalContainers == 0) { throw new IllegalArgumentException("Cannot run distributed shell with no containers"); } requestPriority = Integer.parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PRIORITY, "0")); containerRetryPolicy = ContainerRetryPolicy.values()[Integer .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_POLICY, "0"))]; if (cliParser.hasOption(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_ERROR_CODES)) { containerRetryErrorCodes = new HashSet<>(); for (String errorCode : cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_ERROR_CODES) .split(",")) { containerRetryErrorCodes.add(Integer.parseInt(errorCode)); } } containerMaxRetries = Integer .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_MAX_RETRIES, "0")); containrRetryInterval = Integer .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_CONTAINER_RETRY_INTERVAL, "0")); caffeProcessorJar = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_JAR, CaffeAmContainer.APPMASTER_JAR_PATH); solver = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_SOLVER, ""); train = cliParser.hasOption(CaffeApplication.OPT_CAFFE_PROCESSOR_TRAIN); feature = cliParser.hasOption(CaffeApplication.OPT_CAFFE_PROCESSOR_FEATURES); label = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_LABEL, ""); model = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_MODEL, ""); output = cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_OUTPUT, ""); connection = Integer .parseInt(cliParser.getOptionValue(CaffeApplication.OPT_CAFFE_PROCESSOR_CONNECTION, "2")); clusterSpec = ClusterSpec.makeClusterSpec(numTotalProcessorContainers); return true; }
From source file:org.hdl.tensorflow.yarn.appmaster.ApplicationMaster.java
License:Apache License
private void setupPreviousRunningContainers(RegisterApplicationMasterResponse response) { String containerIdStr = System.getenv(Environment.CONTAINER_ID.name()); ContainerId containerId = ContainerId.fromString(containerIdStr); appAttemptId = containerId.getApplicationAttemptId(); List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts(); LOG.info(appAttemptId + " received " + previousAMRunningContainers.size() + " previous attempts' running containers on AM registration."); for (Container container : previousAMRunningContainers) { launchedContainers.add(container.getId()); }//from w w w .j ava 2 s. co m allocatedContainerNum.addAndGet(previousAMRunningContainers.size()); }
From source file:uk.ac.gla.terrier.probos.controller.ControllerServer.java
License:Open Source License
private TIntObjectHashMap<List<ContainerId>> getAllActiveContainers() { TIntObjectHashMap<List<ContainerId>> rtr = new TIntObjectHashMap<List<ContainerId>>(jobArray.size()); ///*from w ww . j ava2s .c o m*/ for (JobInformation ji : jobArray.valueCollection()) { List<ContainerId> containerList = new ArrayList<ContainerId>(); if (ji.masterContainerId != null && ji.masterContainerId.startsWith("container")) containerList.add(ContainerId.fromString(ji.masterContainerId)); if (ji.taskContainerId != null && ji.taskContainerId.startsWith("container")) containerList.add(ContainerId.fromString(ji.taskContainerId)); for (String arrayItem : ji.array2Container.valueCollection()) if (arrayItem != null && arrayItem.startsWith("container")) containerList.add(ContainerId.fromString(arrayItem)); if (containerList.size() > 0) rtr.put(ji.jobId, containerList); } return rtr; }
From source file:uk.ac.gla.terrier.probos.controller.ControllerServer.java
License:Open Source License
@Override public byte[] jobLog(int jobid, int arrayId, boolean stdout, long start, boolean URLonly) throws Exception { boolean masterRequest = false; if (jobid < 0) { masterRequest = true;// ww w. j av a 2 s . c o m jobid = -1 * jobid; } JobInformation ji = jobArray.get(jobid); if (ji == null) return new byte[0]; if (ji.kitten == null || ji.taskContainerId == null) return new byte[0]; String containerId = null; //either its a master request, an array request, or the main job task if (masterRequest) { containerId = ji.masterContainerId; } else if (ji.jobSpec.getArrayTaskIds() == null)//basic job { containerId = ji.taskContainerId; } else { containerId = ji.array2Container.get(arrayId); } if (containerId == null || containerId.equals("DONE") || containerId.equals("ABORTED")) { return new byte[0]; } byte[] bytes = new byte[0]; try { ContainerReport cs = yClient.getContainerReport(ContainerId.fromString(containerId)); String url = "http:" + cs.getLogUrl() + (stdout ? "/stdout" : "/stderr") + "?start=" + start; if (!URLonly) { //System.err.println(url); InputStream is = new URL(url).openStream(); bytes = IOUtils.toByteArray(is); is.close(); //convoluted process to re-obtain raw byte form //TODO: some wild assumptions about encoding here. String htmlPage = new String(bytes); htmlPage = REPLACE_PRE_PRE.matcher(htmlPage).replaceAll(""); htmlPage = REPLACE_POST_PRE.matcher(htmlPage).replaceAll(""); htmlPage.replaceAll(".*<pre>", "").replaceAll("</pre>.*", ""); htmlPage = StringEscapeUtils.unescapeHtml(htmlPage); bytes = htmlPage.getBytes(); } else { bytes = url.getBytes(); } } catch (ContainerNotFoundException ce) { LOG.warn("Too late to get job log for " + containerId); } catch (Exception e) { LOG.warn("Failed to get job log for " + containerId, e); } return bytes; }