Example usage for org.apache.hadoop.mapred TaskID TaskID

List of usage examples for org.apache.hadoop.mapred TaskID TaskID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskID TaskID.

Prototype

public TaskID(String jtIdentifier, int jobId, TaskType type, int id) 

Source Link

Document

Constructs a TaskInProgressId object from given parts.

Usage

From source file:org.apache.tez.mapreduce.input.base.MRInputBase.java

License:Apache License

public List<Event> initialize() throws IOException {
    getContext().requestInitialMemory(0l, null); // mandatory call
    MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload = MRInputHelpers
            .parseMRInputPayload(getContext().getUserPayload());
    boolean isGrouped = mrUserPayload.getGroupingEnabled();
    Preconditions.checkArgument(mrUserPayload.hasSplits() == false,
            "Split information not expected in " + this.getClass().getName());
    Configuration conf = TezUtils.createConfFromByteString(mrUserPayload.getConfigurationBytes());
    this.jobConf = new JobConf(conf);
    useNewApi = this.jobConf.getUseNewMapper();
    if (isGrouped) {
        if (useNewApi) {
            jobConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR,
                    org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat.class.getName());
        } else {/*  w  w w.  j  a va2s.  c  o m*/
            jobConf.set("mapred.input.format.class",
                    org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.class.getName());
        }
    }

    // Add tokens to the jobConf - in case they are accessed within the RR / IF
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());

    TaskAttemptID taskAttemptId = new TaskAttemptID(
            new TaskID(Long.toString(getContext().getApplicationId().getClusterTimestamp()),
                    getContext().getApplicationId().getId(), TaskType.MAP, getContext().getTaskIndex()),
            getContext().getTaskAttemptNumber());

    jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber());

    this.inputRecordCounter = getContext().getCounters().findCounter(TaskCounter.INPUT_RECORDS_PROCESSED);

    return null;
}

From source file:org.apache.tez.mapreduce.processor.MRTask.java

License:Apache License

@Override
public void initialize() throws IOException, InterruptedException {

    DeprecatedKeys.init();/*  www .j a  v  a2  s.  c  om*/

    processorContext = getContext();
    counters = processorContext.getCounters();
    this.taskAttemptId = new TaskAttemptID(
            new TaskID(Long.toString(processorContext.getApplicationId().getClusterTimestamp()),
                    processorContext.getApplicationId().getId(), (isMap ? TaskType.MAP : TaskType.REDUCE),
                    processorContext.getTaskIndex()),
            processorContext.getTaskAttemptNumber());

    UserPayload userPayload = processorContext.getUserPayload();
    Configuration conf = TezUtils.createConfFromUserPayload(userPayload);
    if (conf instanceof JobConf) {
        this.jobConf = (JobConf) conf;
    } else {
        this.jobConf = new JobConf(conf);
    }
    jobConf.set(Constants.TEZ_RUNTIME_TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, processorContext.getDAGAttemptNumber());

    LOG.info("MRTask.inited: taskAttemptId = " + taskAttemptId.toString());

    // TODO Post MRR
    // A single file per vertex will likely be a better solution. Does not
    // require translation - client can take care of this. Will work independent
    // of whether the configuration is for intermediate tasks or not. Has the
    // overhead of localizing multiple files per job - i.e. the client would
    // need to write these files to hdfs, add them as local resources per
    // vertex. A solution like this may be more practical once it's possible to
    // submit configuration parameters to the AM and effectively tasks via RPC.

    jobConf.set(MRJobConfig.VERTEX_NAME, processorContext.getTaskVertexName());

    if (LOG.isDebugEnabled() && userPayload != null) {
        Iterator<Entry<String, String>> iter = jobConf.iterator();
        String taskIdStr = taskAttemptId.getTaskID().toString();
        while (iter.hasNext()) {
            Entry<String, String> confEntry = iter.next();
            LOG.debug("TaskConf Entry" + ", taskId=" + taskIdStr + ", key=" + confEntry.getKey() + ", value="
                    + confEntry.getValue());
        }
    }

    configureMRTask();
}

From source file:org.elasticsearch.hadoop.mr.HadoopCfgUtils.java

License:Apache License

private static TaskID parseTaskIdFromTaskAttemptId(String taskAttemptId) {
    // Tez in particular uses an incorrect String task1244XXX instead of task_1244 which makes the parsing fail
    // this method try to cope with such issues and look at the numbers if possible
    if (taskAttemptId.startsWith("task")) {
        taskAttemptId = taskAttemptId.substring(4);
    }/*from  www . ja v  a  2 s.  c om*/
    if (taskAttemptId.startsWith("_")) {
        taskAttemptId = taskAttemptId.substring(1);
    }
    List<String> tokenize = StringUtils.tokenize(taskAttemptId, "_");
    // need at least 4 entries from 123123123123_0001_r_0000_4
    if (tokenize.size() < 4) {
        LogFactory.getLog(HadoopCfgUtils.class)
                .warn("Cannot parse task attempt (too little arguments) " + taskAttemptId);
        return null;
    }
    // we parse straight away - in case of an exception we can catch the new format
    try {
        return new TaskID(tokenize.get(0), Integer.parseInt(tokenize.get(1)), tokenize.get(2).startsWith("m"),
                Integer.parseInt(tokenize.get(3)));
    } catch (Exception ex) {
        LogFactory.getLog(HadoopCfgUtils.class).warn("Cannot parse task attempt " + taskAttemptId);
        return null;
    }
}