Example usage for org.apache.hadoop.mapred TaskAttemptID toString

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskAttemptID toString.

Prototype

@Override
    public String toString()

Source Link

Usage

From source file:cascading.flow.tez.util.TezUtil.java

License:Open Source License

public static void setMRProperties(ProcessorContext context, Configuration config, boolean isMapperOutput) {
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(context.getApplicationId().getClusterTimestamp(),
                    context.getTaskVertexIndex(), context.getApplicationId().getId(), context.getTaskIndex(),
                    context.getTaskAttemptNumber(), isMapperOutput);

    config.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    config.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    config.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    config.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
}

From source file:com.digitalpebble.behemoth.uima.UIMAMapper.java

License:Apache License

public void configure(JobConf conf) {

    this.config = conf;

    storeshortnames = config.getBoolean("uima.store.short.names", true);

    File pearpath = new File(conf.get("uima.pear.path"));
    String pearname = pearpath.getName();

    URL urlPEAR = null;//w  ww  .j  av a 2s . c o m

    try {
        Path[] localArchives = DistributedCache.getLocalCacheFiles(conf);
        // identify the right archive
        for (Path la : localArchives) {
            String localPath = la.toUri().toString();
            LOG.info("Inspecting local paths " + localPath);
            if (!localPath.endsWith(pearname))
                continue;
            urlPEAR = new URL("file://" + localPath);
            break;
        }
    } catch (IOException e) {
        throw new RuntimeException("Impossible to retrieve gate application from distributed cache", e);
    }

    if (urlPEAR == null)
        throw new RuntimeException("UIMA pear " + pearpath + " not available in distributed cache");

    File pearFile = new File(urlPEAR.getPath());

    // should check whether a different mapper has already unpacked it
    // but for now we just unpack in a different location for every mapper
    TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id"));
    installDir = new File(pearFile.getParentFile(), attempt.toString());
    PackageBrowser instPear = PackageInstaller.installPackage(installDir, pearFile, true);

    // get the resources required for the AnalysisEngine
    org.apache.uima.resource.ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager();

    // Create analysis engine from the installed PEAR package using
    // the created PEAR specifier
    XMLInputSource in;
    try {
        in = new XMLInputSource(instPear.getComponentPearDescPath());

        ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);

        tae = UIMAFramework.produceAnalysisEngine(specifier, rsrcMgr, null);

        cas = tae.newCAS();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    String[] featuresFilters = this.config.get("uima.features.filter", "").split(",");
    // the featurefilters have the following form : Type:featureName
    // we group them by annotation type
    for (String ff : featuresFilters) {
        String[] fp = ff.split(":");
        if (fp.length != 2)
            continue;
        Set<Feature> features = featfilts.get(fp[0]);
        if (features == null) {
            features = new HashSet<Feature>();
            featfilts.put(fp[0], features);
        }
        Feature f = cas.getTypeSystem().getFeatureByFullName(ff);
        if (f != null)
            features.add(f);
    }

    String[] annotTypes = this.config.get("uima.annotations.filter", "").split(",");
    uimatypes = new ArrayList<Type>(annotTypes.length);

    for (String type : annotTypes) {
        Type aType = cas.getTypeSystem().getType(type);
        uimatypes.add(aType);
    }

}

From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java

License:Apache License

public void open() throws Exception {
    this.conf = new JobConf();
    this.reporter = Reporter.NULL;

    // Some OutputFormats (like FileOutputFormat) require that the job id/task id set.
    // So let's set it for all output formats, just in case they need it too.
    JobID jobid = new JobID("sequential", jobCounter.getAndIncrement());
    TaskAttemptID taskid = new TaskAttemptID(new TaskID(jobid, true, 0), 0);
    conf.set("mapred.task.id", taskid.toString());

    setSequential(conf);/*from w  w w  .  jav  a 2s  .  com*/

    // Create a task so we can use committers.
    sequentialJob = new ExposeJobContext(conf, jobid);
    sequentialTask = new ExposeTaskAttemptContext(conf, taskid);

    // Give the commiter a chance initialize.
    OutputCommitter committer = conf.getOutputCommitter();
    // FIXME: We skip job setup for now because  
    committer.setupJob(sequentialJob);
    committer.setupTask(sequentialTask);

    if (oFormat instanceof JobConfigurable)
        ((JobConfigurable) oFormat).configure(conf);
}

From source file:com.mellanox.hadoop.mapred.UdaPlugin.java

License:Apache License

public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf,
        Reporter reporter, int numMaps) throws IOException {
    super(jobConf);
    this.udaShuffleConsumer = udaShuffleConsumer;
    this.reduceTask = reduceTask;

    String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent
    long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr);
    long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024);
    long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16);
    long shuffleMemorySize = totalRdmaSize;
    StringBuilder meminfoSb = new StringBuilder();
    meminfoSb.append("UDA: numMaps=").append(numMaps);
    meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize);
    meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB");
    meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize);

    if (totalRdmaSize < 0) {
        LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize);
    }//from   w ww  .  ja va 2  s.  co  m

    if (totalRdmaSize <= 0) {
        long maxHeapSize = Runtime.getRuntime().maxMemory();
        double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent",
                DEFAULT_SHUFFLE_INPUT_PERCENT);
        if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) {
            LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: "
                    + DEFAULT_SHUFFLE_INPUT_PERCENT);
            shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT;
        }
        shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent);

        LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory");

        meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B");
        meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent);
        meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B");

        LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB");
    } else {
        LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory");
        LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB");
    }

    LOG.debug(meminfoSb.toString());
    LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB");
    LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB");

    if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution())
        LOG.info("UDA has limited support for map task speculative execution");
    }

    LOG.info("UDA: number of segments to fetch: " + numMaps);

    /* init variables */
    init_kv_bufs();

    launchCppSide(true, this); // true: this is RT => we should execute NetMerger

    this.j2c_queue = new J2CQueue<K, V>();
    this.mTaskReporter = reporter;
    this.mMapsNeed = numMaps;

    /* send init message */
    TaskAttemptID reduceId = reduceTask.getTaskID();

    mParams.clear();
    mParams.add(Integer.toString(numMaps));
    mParams.add(reduceId.getJobID().toString());
    mParams.add(reduceId.toString());
    mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0"));
    mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes)
    mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment          
    mParams.add(jobConf.getOutputKeyClass().getName());

    boolean compression = jobConf.getCompressMapOutput(); //"true" or "false"
    String alg = null;
    if (compression) {
        alg = jobConf.get("mapred.map.output.compression.codec", null);
    }
    mParams.add(alg);

    String bufferSize = Integer.toString(256 * 1024);
    if (alg != null) {
        if (alg.contains("lzo.LzoCodec")) {
            bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize);
        } else if (alg.contains("SnappyCodec")) {
            bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize);
        }
    }
    mParams.add(bufferSize);
    mParams.add(Long.toString(shuffleMemorySize));

    String[] dirs = jobConf.getLocalDirs();
    ArrayList<String> dirsCanBeCreated = new ArrayList<String>();
    //checking if the directories can be created
    for (int i = 0; i < dirs.length; i++) {
        try {
            DiskChecker.checkDir(new File(dirs[i].trim()));
            //saving only the directories that can be created
            dirsCanBeCreated.add(dirs[i].trim());
        } catch (DiskErrorException e) {
        }
    }
    //sending the directories
    int numDirs = dirsCanBeCreated.size();
    mParams.add(Integer.toString(numDirs));
    for (int i = 0; i < numDirs; i++) {
        mParams.add(dirsCanBeCreated.get(i));
    }

    LOG.info("mParams array is " + mParams);
    LOG.info("UDA: sending INIT_COMMAND");
    String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams);
    UdaBridge.doCommand(msg);
    this.mProgress = new Progress();
    this.mProgress.set(0.5f);
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java

License:Apache License

static void updateJobConf(JobConf jobConf, TaskAttemptID taskAttemptID, int partition) {

    //---------------------------------------------------------------------------------
    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 1.2.0,
    //licensed under Apache License, Version 2.0
    //----------------------------------------------------------------------------------

    jobConf.set("mapred.tip.id", taskAttemptID.getTaskID().toString());
    jobConf.set("mapred.task.id", taskAttemptID.toString());
    jobConf.setBoolean("mapred.task.is.map", false);
    jobConf.setInt("mapred.task.partition", partition);
    jobConf.set("mapred.job.id", taskAttemptID.getJobID().toString());

    //---------------------------------------------------------------------------------
    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 2.2.0,
    //licensed under Apache License, Version 2.0
    //----------------------------------------------------------------------------------
    jobConf.set(TASK_ID, taskAttemptID.getTaskID().toString());
    jobConf.set(TASK_ATTEMPT_ID, taskAttemptID.toString());
    jobConf.setBoolean(TASK_ISMAP, false);
    jobConf.setInt(TASK_PARTITION, partition);
    jobConf.set(ID, taskAttemptID.getJobID().toString());
    //----------------------------------------------------------------------------------
}

From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java

License:Apache License

public Path getTempTaskOutputPath(JobConf conf, TaskAttemptID taskAttemptID) {
    Path outputPath = FileOutputFormat.getOutputPath(conf);
    if (outputPath != null) {
        Path p = new Path(outputPath,
                (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_" + taskAttemptID.toString()));
        try {// ww w .jav a2s  . c  o  m
            FileSystem fs = p.getFileSystem(conf);
            return p.makeQualified(fs);
        } catch (IOException ie) {
            LOG.warn(StringUtils.stringifyException(ie));
            return p;
        }
    }
    return null;
}

From source file:eu.stratosphere.hadoopcompatibility.mapred.HadoopOutputFormat.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException// w w  w  .  jav a2s. c om
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    if (Integer.toString(taskNumber + 1).length() > 6) {
        throw new IOException("Task id too large.");
    }

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
            + Integer.toString(taskNumber + 1) + "_0");

    try {
        this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());

    this.fileOutputCommitter = new FileOutputCommitter();

    try {
        this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.fileOutputCommitter.setupJob(jobContext);

    this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
            Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}

From source file:org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormatBase.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException//from  www .  ja v a  2s. com
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

    // enforce sequential open() calls
    synchronized (OPEN_MUTEX) {
        if (Integer.toString(taskNumber + 1).length() > 6) {
            throw new IOException("Task id too large.");
        }

        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String
                .format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
                + Integer.toString(taskNumber + 1) + "_0");

        this.jobConf.set("mapred.task.id", taskAttemptID.toString());
        this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
        // for hadoop 2.2
        this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
        this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

        try {
            this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        this.outputCommitter = this.jobConf.getOutputCommitter();

        JobContext jobContext;
        try {
            jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        this.outputCommitter.setupJob(jobContext);

        this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
                Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
    }
}

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

@Override
public void open(int taskNumber, int numTasks) throws IOException {
    try {/*  ww  w.j  a v a  2  s .c o m*/
        StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
        serializer = (AbstractSerDe) Class.forName(sd.getSerdeInfo().getSerializationLib()).newInstance();
        ReflectionUtils.setConf(serializer, jobConf);
        // TODO: support partition properties, for now assume they're same as table properties
        SerDeUtils.initializeSerDe(serializer, jobConf, tblProperties, null);
        outputClass = serializer.getSerializedClass();
    } catch (IllegalAccessException | SerDeException | InstantiationException | ClassNotFoundException e) {
        throw new FlinkRuntimeException("Error initializing Hive serializer", e);
    }

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber).length()) + "s", " ").replace(" ", "0")
            + taskNumber + "_0");

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    this.jobConf.setInt("mapred.task.partition", taskNumber);
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
    this.jobConf.setInt("mapreduce.task.partition", taskNumber);

    this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

    if (!isDynamicPartition) {
        staticWriter = writerForLocation(hiveTablePartition.getStorageDescriptor().getLocation());
    }

    List<ObjectInspector> objectInspectors = new ArrayList<>();
    for (int i = 0; i < rowTypeInfo.getArity() - partitionCols.size(); i++) {
        objectInspectors.add(HiveTableUtil
                .getObjectInspector(LegacyTypeInfoDataTypeConverter.toDataType(rowTypeInfo.getTypeAt(i))));
    }

    if (!isPartitioned) {
        rowObjectInspector = ObjectInspectorFactory
                .getStandardStructObjectInspector(Arrays.asList(rowTypeInfo.getFieldNames()), objectInspectors);
        numNonPartitionCols = rowTypeInfo.getArity();
    } else {
        rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays
                .asList(rowTypeInfo.getFieldNames()).subList(0, rowTypeInfo.getArity() - partitionCols.size()),
                objectInspectors);
        numNonPartitionCols = rowTypeInfo.getArity() - partitionCols.size();
    }
}

From source file:org.apache.flink.hadoopcompatibility.mapred.HadoopOutputFormat.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException//from w  w  w. ja va2  s  . c o  m
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    if (Integer.toString(taskNumber + 1).length() > 6) {
        throw new IOException("Task id too large.");
    }

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
            + Integer.toString(taskNumber + 1) + "_0");

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
    this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

    try {
        this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.fileOutputCommitter = new FileOutputCommitter();

    try {
        this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.fileOutputCommitter.setupJob(jobContext);

    this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
            Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}