Example usage for org.apache.hadoop.mapred TaskAttemptID toString

List of usage examples for org.apache.hadoop.mapred TaskAttemptID toString


In this page you can find the example usage for org.apache.hadoop.mapred TaskAttemptID toString.


    public String toString() 

Source Link


From source file:cascading.flow.tez.util.TezUtil.java

License:Open Source License

public static void setMRProperties(ProcessorContext context, Configuration config, boolean isMapperOutput) {
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
                    context.getTaskVertexIndex(), context.getApplicationId().getId(), context.getTaskIndex(),
                    context.getTaskAttemptNumber(), isMapperOutput);

    config.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    config.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    config.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    config.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());

From source file:com.digitalpebble.behemoth.uima.UIMAMapper.java

License:Apache License

public void configure(JobConf conf) {

    this.config = conf;

    storeshortnames = config.getBoolean("uima.store.short.names", true);

    File pearpath = new File(conf.get("uima.pear.path"));
    String pearname = pearpath.getName();

    URL urlPEAR = null;//w  ww  .j  av a 2s . c o m

    try {
        Path[] localArchives = DistributedCache.getLocalCacheFiles(conf);
        // identify the right archive
        for (Path la : localArchives) {
            String localPath = la.toUri().toString();
            LOG.info("Inspecting local paths " + localPath);
            if (!localPath.endsWith(pearname))
            urlPEAR = new URL("file://" + localPath);
    } catch (IOException e) {
        throw new RuntimeException("Impossible to retrieve gate application from distributed cache", e);

    if (urlPEAR == null)
        throw new RuntimeException("UIMA pear " + pearpath + " not available in distributed cache");

    File pearFile = new File(urlPEAR.getPath());

    // should check whether a different mapper has already unpacked it
    // but for now we just unpack in a different location for every mapper
    TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id"));
    installDir = new File(pearFile.getParentFile(), attempt.toString());
    PackageBrowser instPear = PackageInstaller.installPackage(installDir, pearFile, true);

    // get the resources required for the AnalysisEngine
    org.apache.uima.resource.ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager();

    // Create analysis engine from the installed PEAR package using
    // the created PEAR specifier
    XMLInputSource in;
    try {
        in = new XMLInputSource(instPear.getComponentPearDescPath());

        ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);

        tae = UIMAFramework.produceAnalysisEngine(specifier, rsrcMgr, null);

        cas = tae.newCAS();
    } catch (Exception e) {
        throw new RuntimeException(e);

    String[] featuresFilters = this.config.get("uima.features.filter", "").split(",");
    // the featurefilters have the following form : Type:featureName
    // we group them by annotation type
    for (String ff : featuresFilters) {
        String[] fp = ff.split(":");
        if (fp.length != 2)
        Set<Feature> features = featfilts.get(fp[0]);
        if (features == null) {
            features = new HashSet<Feature>();
            featfilts.put(fp[0], features);
        Feature f = cas.getTypeSystem().getFeatureByFullName(ff);
        if (f != null)

    String[] annotTypes = this.config.get("uima.annotations.filter", "").split(",");
    uimatypes = new ArrayList<Type>(annotTypes.length);

    for (String type : annotTypes) {
        Type aType = cas.getTypeSystem().getType(type);


From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java

License:Apache License

public void open() throws Exception {
    this.conf = new JobConf();
    this.reporter = Reporter.NULL;

    // Some OutputFormats (like FileOutputFormat) require that the job id/task id set.
    // So let's set it for all output formats, just in case they need it too.
    JobID jobid = new JobID("sequential", jobCounter.getAndIncrement());
    TaskAttemptID taskid = new TaskAttemptID(new TaskID(jobid, true, 0), 0);
    conf.set("mapred.task.id", taskid.toString());

    setSequential(conf);/*from w  w w  .  jav  a 2s  .  com*/

    // Create a task so we can use committers.
    sequentialJob = new ExposeJobContext(conf, jobid);
    sequentialTask = new ExposeTaskAttemptContext(conf, taskid);

    // Give the commiter a chance initialize.
    OutputCommitter committer = conf.getOutputCommitter();
    // FIXME: We skip job setup for now because  

    if (oFormat instanceof JobConfigurable)
        ((JobConfigurable) oFormat).configure(conf);

From source file:com.mellanox.hadoop.mapred.UdaPlugin.java

License:Apache License

public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf,
        Reporter reporter, int numMaps) throws IOException {
    this.udaShuffleConsumer = udaShuffleConsumer;
    this.reduceTask = reduceTask;

    String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent
    long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr);
    long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024);
    long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16);
    long shuffleMemorySize = totalRdmaSize;
    StringBuilder meminfoSb = new StringBuilder();
    meminfoSb.append("UDA: numMaps=").append(numMaps);
    meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize);
    meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB");
    meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize);

    if (totalRdmaSize < 0) {
        LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize);
    }//from   w ww  .  ja va 2  s.  co  m

    if (totalRdmaSize <= 0) {
        long maxHeapSize = Runtime.getRuntime().maxMemory();
        double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent",
        if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) {
            LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: "
                    + DEFAULT_SHUFFLE_INPUT_PERCENT);
            shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT;
        shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent);

        LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory");

        meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B");
        meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent);
        meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B");

        LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB");
    } else {
        LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory");
        LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB");

    LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB");
    LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB");

    if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution())
        LOG.info("UDA has limited support for map task speculative execution");

    LOG.info("UDA: number of segments to fetch: " + numMaps);

    /* init variables */

    launchCppSide(true, this); // true: this is RT => we should execute NetMerger

    this.j2c_queue = new J2CQueue<K, V>();
    this.mTaskReporter = reporter;
    this.mMapsNeed = numMaps;

    /* send init message */
    TaskAttemptID reduceId = reduceTask.getTaskID();

    mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0"));
    mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes)
    mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment          

    boolean compression = jobConf.getCompressMapOutput(); //"true" or "false"
    String alg = null;
    if (compression) {
        alg = jobConf.get("mapred.map.output.compression.codec", null);

    String bufferSize = Integer.toString(256 * 1024);
    if (alg != null) {
        if (alg.contains("lzo.LzoCodec")) {
            bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize);
        } else if (alg.contains("SnappyCodec")) {
            bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize);

    String[] dirs = jobConf.getLocalDirs();
    ArrayList<String> dirsCanBeCreated = new ArrayList<String>();
    //checking if the directories can be created
    for (int i = 0; i < dirs.length; i++) {
        try {
            DiskChecker.checkDir(new File(dirs[i].trim()));
            //saving only the directories that can be created
        } catch (DiskErrorException e) {
    //sending the directories
    int numDirs = dirsCanBeCreated.size();
    for (int i = 0; i < numDirs; i++) {

    LOG.info("mParams array is " + mParams);
    LOG.info("UDA: sending INIT_COMMAND");
    String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams);
    this.mProgress = new Progress();

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java

License:Apache License

static void updateJobConf(JobConf jobConf, TaskAttemptID taskAttemptID, int partition) {

    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 1.2.0,
    //licensed under Apache License, Version 2.0

    jobConf.set("mapred.tip.id", taskAttemptID.getTaskID().toString());
    jobConf.set("mapred.task.id", taskAttemptID.toString());
    jobConf.setBoolean("mapred.task.is.map", false);
    jobConf.setInt("mapred.task.partition", partition);
    jobConf.set("mapred.job.id", taskAttemptID.getJobID().toString());

    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 2.2.0,
    //licensed under Apache License, Version 2.0
    jobConf.set(TASK_ID, taskAttemptID.getTaskID().toString());
    jobConf.set(TASK_ATTEMPT_ID, taskAttemptID.toString());
    jobConf.setBoolean(TASK_ISMAP, false);
    jobConf.setInt(TASK_PARTITION, partition);
    jobConf.set(ID, taskAttemptID.getJobID().toString());

From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java

License:Apache License

public Path getTempTaskOutputPath(JobConf conf, TaskAttemptID taskAttemptID) {
    Path outputPath = FileOutputFormat.getOutputPath(conf);
    if (outputPath != null) {
        Path p = new Path(outputPath,
                (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_" + taskAttemptID.toString()));
        try {// ww w .jav a2s  . c  o  m
            FileSystem fs = p.getFileSystem(conf);
            return p.makeQualified(fs);
        } catch (IOException ie) {
            return p;
    return null;

From source file:eu.stratosphere.hadoopcompatibility.mapred.HadoopOutputFormat.java

License:Apache License

 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException// w w  w  .  jav a2s. c om
public void open(int taskNumber, int numTasks) throws IOException {
    if (Integer.toString(taskNumber + 1).length() > 6) {
        throw new IOException("Task id too large.");

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
            + Integer.toString(taskNumber + 1) + "_0");

    try {
        this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
    } catch (Exception e) {
        throw new RuntimeException(e);

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());

    this.fileOutputCommitter = new FileOutputCommitter();

    try {
        this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
    } catch (Exception e) {
        throw new RuntimeException(e);


    this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
            Integer.toString(taskNumber + 1), new HadoopDummyProgressable());

From source file:org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormatBase.java

License:Apache License

 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException//from  www .  ja v a  2s. com
public void open(int taskNumber, int numTasks) throws IOException {

    // enforce sequential open() calls
    synchronized (OPEN_MUTEX) {
        if (Integer.toString(taskNumber + 1).length() > 6) {
            throw new IOException("Task id too large.");

        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String
                .format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
                + Integer.toString(taskNumber + 1) + "_0");

        this.jobConf.set("mapred.task.id", taskAttemptID.toString());
        this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
        // for hadoop 2.2
        this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
        this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

        try {
            this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
        } catch (Exception e) {
            throw new RuntimeException(e);

        this.outputCommitter = this.jobConf.getOutputCommitter();

        JobContext jobContext;
        try {
            jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
        } catch (Exception e) {
            throw new RuntimeException(e);


        this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
                Integer.toString(taskNumber + 1), new HadoopDummyProgressable());

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

public void open(int taskNumber, int numTasks) throws IOException {
    try {/*  ww  w.j  a v a  2  s .c o m*/
        StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
        serializer = (AbstractSerDe) Class.forName(sd.getSerdeInfo().getSerializationLib()).newInstance();
        ReflectionUtils.setConf(serializer, jobConf);
        // TODO: support partition properties, for now assume they're same as table properties
        SerDeUtils.initializeSerDe(serializer, jobConf, tblProperties, null);
        outputClass = serializer.getSerializedClass();
    } catch (IllegalAccessException | SerDeException | InstantiationException | ClassNotFoundException e) {
        throw new FlinkRuntimeException("Error initializing Hive serializer", e);

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber).length()) + "s", " ").replace(" ", "0")
            + taskNumber + "_0");

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    this.jobConf.setInt("mapred.task.partition", taskNumber);
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
    this.jobConf.setInt("mapreduce.task.partition", taskNumber);

    this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

    if (!isDynamicPartition) {
        staticWriter = writerForLocation(hiveTablePartition.getStorageDescriptor().getLocation());

    List<ObjectInspector> objectInspectors = new ArrayList<>();
    for (int i = 0; i < rowTypeInfo.getArity() - partitionCols.size(); i++) {

    if (!isPartitioned) {
        rowObjectInspector = ObjectInspectorFactory
                .getStandardStructObjectInspector(Arrays.asList(rowTypeInfo.getFieldNames()), objectInspectors);
        numNonPartitionCols = rowTypeInfo.getArity();
    } else {
        rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays
                .asList(rowTypeInfo.getFieldNames()).subList(0, rowTypeInfo.getArity() - partitionCols.size()),
        numNonPartitionCols = rowTypeInfo.getArity() - partitionCols.size();

From source file:org.apache.flink.hadoopcompatibility.mapred.HadoopOutputFormat.java

License:Apache License

 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException//from w  w  w. ja va2  s  . c o  m
public void open(int taskNumber, int numTasks) throws IOException {
    if (Integer.toString(taskNumber + 1).length() > 6) {
        throw new IOException("Task id too large.");

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
            + Integer.toString(taskNumber + 1) + "_0");

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
    this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

    try {
        this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
    } catch (Exception e) {
        throw new RuntimeException(e);

    this.fileOutputCommitter = new FileOutputCommitter();

    try {
        this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
    } catch (Exception e) {
        throw new RuntimeException(e);


    this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
            Integer.toString(taskNumber + 1), new HadoopDummyProgressable());