Example usage for org.apache.hadoop.mapred TaskAttemptID forName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskAttemptID forName.

Prototype

public static TaskAttemptID forName(String str) throws IllegalArgumentException

Source Link

Document

Construct a TaskAttemptID object from given string

Usage

From source file:nl.tudelft.graphalytics.mapreducev2.evo.DirectedForestFireModelMap.java

License:Apache License

@Override
public void configure(JobConf conf) {
    TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id"));
    this.taskID = attempt.getTaskID().getId(); // todo verify
    this.newVerticesPerSlot = conf.getInt(ForestFireModelUtils.NEW_VERTICES_NR, -1);
    this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1);
    this.isFirst = conf.getBoolean(ForestFireModelUtils.IS_INIT, false);
    this.isInit = this.isFirst;

    if (this.isInit)
        this.ambassadors = new HashMap<LongWritable, List<LongWritable>>();
    else//from  w ww  .j a v a 2  s  . c  o  m
        this.ambassadors = ForestFireModelUtils
                .verticesIdsString2Map(conf.get(ForestFireModelUtils.CURRENT_AMBASSADORS));
}

From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelMap.java

License:Apache License

@Override
public void configure(JobConf conf) {
    TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id"));
    this.taskID = attempt.getTaskID().getId();
    this.newVerticesPerSlot = conf.getInt(ForestFireModelUtils.NEW_VERTICES_NR, -1);
    this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1);
    this.isFirst = conf.getBoolean(ForestFireModelUtils.IS_INIT, false);
    this.isInit = this.isFirst;

    if (this.isInit)
        this.ambassadors = new HashMap<LongWritable, List<LongWritable>>();
    else//from w ww. ja  v a  2s  .  c  o  m
        this.ambassadors = ForestFireModelUtils
                .verticesIdsString2Map(conf.get(ForestFireModelUtils.CURRENT_AMBASSADORS));
}

From source file:org.apache.avro.mapred.tether.TetheredProcess.java

License:Apache License

private Process startSubprocess(JobConf job) throws IOException, InterruptedException {
    // get the executable command
    List<String> command = new ArrayList<String>();

    String executable = "";
    if (job.getBoolean(TetherJob.TETHER_EXEC_CACHED, false)) {
        //we want to use the cached executable
        Path[] localFiles = DistributedCache.getLocalCacheFiles(job);
        if (localFiles == null) { // until MAPREDUCE-476
            URI[] files = DistributedCache.getCacheFiles(job);
            localFiles = new Path[] { new Path(files[0].toString()) };
        }/*from  w w w.  ja  v  a 2  s  . co  m*/
        executable = localFiles[0].toString();
        FileUtil.chmod(executable.toString(), "a+x");
    } else {
        executable = job.get(TetherJob.TETHER_EXEC);
    }

    command.add(executable);

    // Add the executable arguments. We assume the arguments are separated by
    // spaces so we split the argument string based on spaces and add each
    // token to command We need to do it this way because
    // TaskLog.captureOutAndError will put quote marks around each argument so
    // if we pass a single string containing all arguments we get quoted
    // incorrectly
    String args = job.get(TetherJob.TETHER_EXEC_ARGS);
    String[] aparams = args.split("\n");
    for (int i = 0; i < aparams.length; i++) {
        aparams[i] = aparams[i].trim();
        if (aparams[i].length() > 0) {
            command.add(aparams[i]);
        }
    }

    if (System.getProperty("hadoop.log.dir") == null && System.getenv("HADOOP_LOG_DIR") != null)
        System.setProperty("hadoop.log.dir", System.getenv("HADOOP_LOG_DIR"));

    // wrap the command in a stdout/stderr capture
    TaskAttemptID taskid = TaskAttemptID.forName(job.get("mapred.task.id"));
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(job);
    command = TaskLog.captureOutAndError(command, stdout, stderr, logLength);
    stdout.getParentFile().mkdirs();
    stderr.getParentFile().mkdirs();

    // add output server's port to env
    Map<String, String> env = new HashMap<String, String>();
    env.put("AVRO_TETHER_OUTPUT_PORT", Integer.toString(outputServer.getPort()));

    // start child process
    ProcessBuilder builder = new ProcessBuilder(command);
    System.out.println(command);
    builder.environment().putAll(env);
    return builder.start();
}

From source file:org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormatBase.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException//from www.  j a  v  a2 s. com
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

    // enforce sequential open() calls
    synchronized (OPEN_MUTEX) {
        if (Integer.toString(taskNumber + 1).length() > 6) {
            throw new IOException("Task id too large.");
        }

        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String
                .format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
                + Integer.toString(taskNumber + 1) + "_0");

        this.jobConf.set("mapred.task.id", taskAttemptID.toString());
        this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
        // for hadoop 2.2
        this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
        this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

        try {
            this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        this.outputCommitter = this.jobConf.getOutputCommitter();

        JobContext jobContext;
        try {
            jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        this.outputCommitter.setupJob(jobContext);

        this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
                Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
    }
}

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

@Override
public void open(int taskNumber, int numTasks) throws IOException {
    try {//from   w  w  w .  java 2s . c  o m
        StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
        serializer = (AbstractSerDe) Class.forName(sd.getSerdeInfo().getSerializationLib()).newInstance();
        ReflectionUtils.setConf(serializer, jobConf);
        // TODO: support partition properties, for now assume they're same as table properties
        SerDeUtils.initializeSerDe(serializer, jobConf, tblProperties, null);
        outputClass = serializer.getSerializedClass();
    } catch (IllegalAccessException | SerDeException | InstantiationException | ClassNotFoundException e) {
        throw new FlinkRuntimeException("Error initializing Hive serializer", e);
    }

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber).length()) + "s", " ").replace(" ", "0")
            + taskNumber + "_0");

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    this.jobConf.setInt("mapred.task.partition", taskNumber);
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
    this.jobConf.setInt("mapreduce.task.partition", taskNumber);

    this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

    if (!isDynamicPartition) {
        staticWriter = writerForLocation(hiveTablePartition.getStorageDescriptor().getLocation());
    }

    List<ObjectInspector> objectInspectors = new ArrayList<>();
    for (int i = 0; i < rowTypeInfo.getArity() - partitionCols.size(); i++) {
        objectInspectors.add(HiveTableUtil
                .getObjectInspector(LegacyTypeInfoDataTypeConverter.toDataType(rowTypeInfo.getTypeAt(i))));
    }

    if (!isPartitioned) {
        rowObjectInspector = ObjectInspectorFactory
                .getStandardStructObjectInspector(Arrays.asList(rowTypeInfo.getFieldNames()), objectInspectors);
        numNonPartitionCols = rowTypeInfo.getArity();
    } else {
        rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays
                .asList(rowTypeInfo.getFieldNames()).subList(0, rowTypeInfo.getArity() - partitionCols.size()),
                objectInspectors);
        numNonPartitionCols = rowTypeInfo.getArity() - partitionCols.size();
    }
}

From source file:org.apache.flink.hadoopcompatibility.mapred.HadoopOutputFormat.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException//from   w  w  w.  ja  va  2s . c o m
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    if (Integer.toString(taskNumber + 1).length() > 6) {
        throw new IOException("Task id too large.");
    }

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
            + Integer.toString(taskNumber + 1) + "_0");

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
    this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

    try {
        this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.fileOutputCommitter = new FileOutputCommitter();

    try {
        this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.fileOutputCommitter.setupJob(jobContext);

    this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
            Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}

From source file:org.apache.hive.hcatalog.mapreduce.HCatMapRedUtils.java

License:Apache License

/**
 * Creates a {@code TaskAttemptID} from the provided TaskAttemptContext. This
 * also performs logic to strip the crunch named output from the TaskAttemptID
 * already associated with the TaskAttemptContext. The TaskAttemptID requires
 * there to be six parts, separated by "_". With the named output the JobID
 * has 7 parts. That needs to be stripped away before a new TaskAttemptID can
 * be constructed./*from ww w.  jav a 2  s .  co  m*/
 *
 * @param context
 *          The TaskAttemptContext
 * @return A TaskAttemptID with the crunch named output removed
 */
public static TaskAttemptID getTaskAttemptID(TaskAttemptContext context) {
    String taskAttemptId = context.getTaskAttemptID().toString();
    List<String> taskAttemptIDParts = Lists.newArrayList(taskAttemptId.split("_"));
    if (taskAttemptIDParts.size() < 7)
        return TaskAttemptID.forName(taskAttemptId);

    // index 2 is the 3rd element in the task attempt id, which will be the
    // named output
    taskAttemptIDParts.remove(2);
    String reducedTaskAttemptId = StringUtils.join(taskAttemptIDParts, "_");
    return TaskAttemptID.forName(reducedTaskAttemptId);
}

From source file:org.commoncrawl.hadoop.io.S3GetMetdataJob.java

License:Open Source License

public void configure(JobConf job) {

    _attemptID = TaskAttemptID.forName(job.get("mapred.task.id"));
    _maxAttemptsPerTask = job.getInt("mapred.max.tracker.failures", 4);
    _splitDetails = job.get(ARCSplitReader.SPLIT_DETAILS, "Spit Details Unknown");
}

From source file:org.commoncrawl.hadoop.template.SampleHadoopJob.java

License:Open Source License

/** overloaded to initialize class variables from job config **/
@Override//from ww w  .j  a  va  2  s .co m
public void configure(JobConf job) {

    attemptID = TaskAttemptID.forName(job.get("mapred.task.id"));
    maxAttemptTaskId = job.getInt("mapred.max.tracker.failures", 4) - 1;
    splitDetails = job.get(ARCSplitReader.SPLIT_DETAILS, "Spit Details Unknown");
    pattern = Pattern.compile(job.get("mapred.mapper.regex"));
    group = job.getInt("mapred.mapper.regex.group", 0);

}

From source file:org.elasticsearch.hadoop.mr.HadoopCfgUtils.java

License:Apache License

public static TaskID getTaskID(Configuration cfg) {
    // first try with the attempt since some Hadoop versions mix the two
    String taskAttemptId = HadoopCfgUtils.getTaskAttemptId(cfg);
    if (StringUtils.hasText(taskAttemptId)) {
        try {//from   w w w .  j  a v  a2  s  .co  m
            return TaskAttemptID.forName(taskAttemptId).getTaskID();
        } catch (IllegalArgumentException ex) {
            // the task attempt is invalid (Tez in particular uses the wrong string - see #346)
            // try to fallback to task id
            return parseTaskIdFromTaskAttemptId(taskAttemptId);
        }
    }
    String taskIdProp = HadoopCfgUtils.getTaskId(cfg);
    // double-check task id bug in Hadoop 2.5.x
    if (StringUtils.hasText(taskIdProp) && !taskIdProp.contains("attempt")) {
        return TaskID.forName(taskIdProp);
    }
    return null;
}