List of usage examples for org.apache.hadoop.mapred TaskID TaskID
public TaskID(String jtIdentifier, int jobId, TaskType type, int id)
From source file:org.apache.tez.mapreduce.input.base.MRInputBase.java
License:Apache License
public List<Event> initialize() throws IOException { getContext().requestInitialMemory(0l, null); // mandatory call MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload = MRInputHelpers .parseMRInputPayload(getContext().getUserPayload()); boolean isGrouped = mrUserPayload.getGroupingEnabled(); Preconditions.checkArgument(mrUserPayload.hasSplits() == false, "Split information not expected in " + this.getClass().getName()); Configuration conf = TezUtils.createConfFromByteString(mrUserPayload.getConfigurationBytes()); this.jobConf = new JobConf(conf); useNewApi = this.jobConf.getUseNewMapper(); if (isGrouped) { if (useNewApi) { jobConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat.class.getName()); } else {/* w w w. j a va2s. c o m*/ jobConf.set("mapred.input.format.class", org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.class.getName()); } } // Add tokens to the jobConf - in case they are accessed within the RR / IF jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials()); TaskAttemptID taskAttemptId = new TaskAttemptID( new TaskID(Long.toString(getContext().getApplicationId().getClusterTimestamp()), getContext().getApplicationId().getId(), TaskType.MAP, getContext().getTaskIndex()), getContext().getTaskAttemptNumber()); jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber()); this.inputRecordCounter = getContext().getCounters().findCounter(TaskCounter.INPUT_RECORDS_PROCESSED); return null; }
From source file:org.apache.tez.mapreduce.processor.MRTask.java
License:Apache License
@Override public void initialize() throws IOException, InterruptedException { DeprecatedKeys.init();/* www .j a v a2 s. c om*/ processorContext = getContext(); counters = processorContext.getCounters(); this.taskAttemptId = new TaskAttemptID( new TaskID(Long.toString(processorContext.getApplicationId().getClusterTimestamp()), processorContext.getApplicationId().getId(), (isMap ? TaskType.MAP : TaskType.REDUCE), processorContext.getTaskIndex()), processorContext.getTaskAttemptNumber()); UserPayload userPayload = processorContext.getUserPayload(); Configuration conf = TezUtils.createConfFromUserPayload(userPayload); if (conf instanceof JobConf) { this.jobConf = (JobConf) conf; } else { this.jobConf = new JobConf(conf); } jobConf.set(Constants.TEZ_RUNTIME_TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, processorContext.getDAGAttemptNumber()); LOG.info("MRTask.inited: taskAttemptId = " + taskAttemptId.toString()); // TODO Post MRR // A single file per vertex will likely be a better solution. Does not // require translation - client can take care of this. Will work independent // of whether the configuration is for intermediate tasks or not. Has the // overhead of localizing multiple files per job - i.e. the client would // need to write these files to hdfs, add them as local resources per // vertex. A solution like this may be more practical once it's possible to // submit configuration parameters to the AM and effectively tasks via RPC. jobConf.set(MRJobConfig.VERTEX_NAME, processorContext.getTaskVertexName()); if (LOG.isDebugEnabled() && userPayload != null) { Iterator<Entry<String, String>> iter = jobConf.iterator(); String taskIdStr = taskAttemptId.getTaskID().toString(); while (iter.hasNext()) { Entry<String, String> confEntry = iter.next(); LOG.debug("TaskConf Entry" + ", taskId=" + taskIdStr + ", key=" + confEntry.getKey() + ", value=" + confEntry.getValue()); } } configureMRTask(); }
From source file:org.elasticsearch.hadoop.mr.HadoopCfgUtils.java
License:Apache License
private static TaskID parseTaskIdFromTaskAttemptId(String taskAttemptId) { // Tez in particular uses an incorrect String task1244XXX instead of task_1244 which makes the parsing fail // this method try to cope with such issues and look at the numbers if possible if (taskAttemptId.startsWith("task")) { taskAttemptId = taskAttemptId.substring(4); }/*from www . ja v a 2 s. c om*/ if (taskAttemptId.startsWith("_")) { taskAttemptId = taskAttemptId.substring(1); } List<String> tokenize = StringUtils.tokenize(taskAttemptId, "_"); // need at least 4 entries from 123123123123_0001_r_0000_4 if (tokenize.size() < 4) { LogFactory.getLog(HadoopCfgUtils.class) .warn("Cannot parse task attempt (too little arguments) " + taskAttemptId); return null; } // we parse straight away - in case of an exception we can catch the new format try { return new TaskID(tokenize.get(0), Integer.parseInt(tokenize.get(1)), tokenize.get(2).startsWith("m"), Integer.parseInt(tokenize.get(3))); } catch (Exception ex) { LogFactory.getLog(HadoopCfgUtils.class).warn("Cannot parse task attempt " + taskAttemptId); return null; } }