Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:org.apache.giraph.job.HadoopUtils.java

License:Apache License

/**
 * Create a TaskAttemptContext, supporting many Hadoops.
 *
 * @param conf Configuration/*from w  w  w  .j  av a2s .c om*/
 * @param taskAttemptContext Use TaskAttemptID from this object
 * @return TaskAttemptContext
 */
public static TaskAttemptContext makeTaskAttemptContext(Configuration conf,
        TaskAttemptContext taskAttemptContext) {
    return makeTaskAttemptContext(conf, taskAttemptContext.getTaskAttemptID());
}

From source file:org.apache.hcatalog.mapreduce.DefaultOutputFormatContainer.java

License:Apache License

/**
 * Get the record writer for the job. Uses the storagehandler's OutputFormat
 * to get the record writer.// ww w .  j a  v a  2s  .co  m
 * @param context the information about the current task.
 * @return a RecordWriter to write the output for the job.
 * @throws IOException
 */
@Override
public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    String name = getOutputName(context.getTaskAttemptID().getTaskID().getId());
    return new DefaultRecordWriterContainer(context, getBaseOutputFormat().getRecordWriter(null,
            new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context)));
}

From source file:org.apache.hcatalog.mapreduce.FileRecordWriterContainer.java

License:Apache License

@Override
public void write(WritableComparable<?> key, HCatRecord value) throws IOException, InterruptedException {

    org.apache.hadoop.mapred.RecordWriter localWriter;
    ObjectInspector localObjectInspector;
    SerDe localSerDe;// w w w  .j  a v  a2s  .  c  om
    OutputJobInfo localJobInfo = null;

    if (dynamicPartitioningUsed) {
        // calculate which writer to use from the remaining values - this needs to be done before we delete cols
        List<String> dynamicPartValues = new ArrayList<String>();
        for (Integer colToAppend : dynamicPartCols) {
            dynamicPartValues.add(value.get(colToAppend).toString());
        }

        String dynKey = dynamicPartValues.toString();
        if (!baseDynamicWriters.containsKey(dynKey)) {
            if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) {
                throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                        "Number of dynamic partitions being created "
                                + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                + "] if needed.");
            }

            org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil
                    .createTaskAttemptContext(context);
            configureDynamicStorageHandler(currTaskContext, dynamicPartValues);
            localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext);

            //setup serDe
            SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
                    currTaskContext.getJobConf());
            try {
                InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo);
            } catch (SerDeException e) {
                throw new IOException("Failed to initialize SerDe", e);
            }

            //create base OutputFormat
            org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils
                    .newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());

            //We are skipping calling checkOutputSpecs() for each partition
            //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition
            //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance
            //In general this should be ok for most FileOutputFormat implementations
            //but may become an issue for cases when the method is used to perform other setup tasks

            //get Output Committer
            org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf()
                    .getOutputCommitter();
            //create currJobContext the latest so it gets all the config changes
            org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil
                    .createJobContext(currTaskContext);
            //setupJob()
            baseOutputCommitter.setupJob(currJobContext);
            //recreate to refresh jobConf of currTask context
            currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(),
                    currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible());
            //set temp location
            currTaskContext.getConfiguration().set("mapred.work.output.dir",
                    new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath()
                            .toString());
            //setupTask()
            baseOutputCommitter.setupTask(currTaskContext);

            Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
            Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext, "part", ""));

            org.apache.hadoop.mapred.RecordWriter baseRecordWriter = baseOF.getRecordWriter(
                    parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(),
                    childPath.toString(), InternalUtil.createReporter(currTaskContext));

            baseDynamicWriters.put(dynKey, baseRecordWriter);
            baseDynamicSerDe.put(dynKey, currSerDe);
            baseDynamicCommitters.put(dynKey, baseOutputCommitter);
            dynamicContexts.put(dynKey, currTaskContext);
            dynamicObjectInspectors.put(dynKey,
                    InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()));
            dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey)));
        }

        localJobInfo = dynamicOutputJobInfo.get(dynKey);
        localWriter = baseDynamicWriters.get(dynKey);
        localSerDe = baseDynamicSerDe.get(dynKey);
        localObjectInspector = dynamicObjectInspectors.get(dynKey);
    } else {
        localJobInfo = jobInfo;
        localWriter = getBaseRecordWriter();
        localSerDe = serDe;
        localObjectInspector = objectInspector;
    }

    for (Integer colToDel : partColsToDel) {
        value.remove(colToDel);
    }

    //The key given by user is ignored
    try {
        localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector));
    } catch (SerDeException e) {
        throw new IOException("Failed to serialize object", e);
    }
}

From source file:org.apache.hcatalog.mapreduce.MultiOutputFormat.java

License:Apache License

/**
 * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias
 * and the actual TaskAttemptContext//w  w w .  j a v  a 2s . co m
 * @param alias the name given to the OutputFormat configuration
 * @param context the Mapper or Reducer Context
 * @return a copy of the TaskAttemptContext with the alias configuration populated
 */
public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) {
    String aliasConf = context.getConfiguration().get(getAliasConfName(alias));
    TaskAttemptContext aliasContext = HCatHadoopShims.Instance.get()
            .createTaskAttemptContext(context.getConfiguration(), context.getTaskAttemptID());
    addToConfig(aliasConf, aliasContext.getConfiguration());
    return aliasContext;
}

From source file:org.apache.hive.hcatalog.mapreduce.DefaultOutputFormatContainer.java

License:Apache License

static synchronized String getOutputName(TaskAttemptContext context) {
    return context.getConfiguration().get("mapreduce.output.basename", "part") + "-"
            + NUMBER_FORMAT.format(context.getTaskAttemptID().getTaskID().getId());
}

From source file:org.apache.hive.hcatalog.mapreduce.DynamicPartitionFileRecordWriterContainer.java

License:Apache License

@Override
protected LocalFileWriter getLocalFileWriter(HCatRecord value) throws IOException, HCatException {
    OutputJobInfo localJobInfo = null;//w ww.  j a  v a2 s . c  o  m
    // Calculate which writer to use from the remaining values - this needs to
    // be done before we delete cols.
    List<String> dynamicPartValues = new ArrayList<String>();
    for (Integer colToAppend : dynamicPartCols) {
        Object partitionValue = value.get(colToAppend);
        dynamicPartValues
                .add(partitionValue == null ? HIVE_DEFAULT_PARTITION_VALUE : partitionValue.toString());
    }

    String dynKey = dynamicPartValues.toString();
    if (!baseDynamicWriters.containsKey(dynKey)) {
        if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) {
            throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                    "Number of dynamic partitions being created "
                            + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                            + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                            + "] if needed.");
        }

        org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil
                .createTaskAttemptContext(context);
        configureDynamicStorageHandler(currTaskContext, dynamicPartValues);
        localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext.getConfiguration());

        // Setup serDe.
        SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
                currTaskContext.getJobConf());
        try {
            InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo);
        } catch (SerDeException e) {
            throw new IOException("Failed to initialize SerDe", e);
        }

        // create base OutputFormat
        org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils
                .newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());

        // We are skipping calling checkOutputSpecs() for each partition
        // As it can throw a FileAlreadyExistsException when more than one
        // mapper is writing to a partition.
        // See HCATALOG-490, also to avoid contacting the namenode for each new
        // FileOutputFormat instance.
        // In general this should be ok for most FileOutputFormat implementations
        // but may become an issue for cases when the method is used to perform
        // other setup tasks.

        // Get Output Committer
        org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf()
                .getOutputCommitter();

        // Create currJobContext the latest so it gets all the config changes
        org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext);

        // Set up job.
        baseOutputCommitter.setupJob(currJobContext);

        // Recreate to refresh jobConf of currTask context.
        currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(),
                currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible());

        // Set temp location.
        currTaskContext.getConfiguration().set("mapred.work.output.dir",
                new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath()
                        .toString());

        // Set up task.
        baseOutputCommitter.setupTask(currTaskContext);

        Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
        Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext,
                currTaskContext.getConfiguration().get("mapreduce.output.basename", "part"), ""));

        RecordWriter baseRecordWriter = baseOF.getRecordWriter(
                parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(),
                childPath.toString(), InternalUtil.createReporter(currTaskContext));

        baseDynamicWriters.put(dynKey, baseRecordWriter);
        baseDynamicSerDe.put(dynKey, currSerDe);
        baseDynamicCommitters.put(dynKey, baseOutputCommitter);
        dynamicContexts.put(dynKey, currTaskContext);
        dynamicObjectInspectors.put(dynKey,
                InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()));
        dynamicOutputJobInfo.put(dynKey,
                HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey).getConfiguration()));
    }

    return new LocalFileWriter(baseDynamicWriters.get(dynKey), dynamicObjectInspectors.get(dynKey),
            baseDynamicSerDe.get(dynKey), dynamicOutputJobInfo.get(dynKey));
}

From source file:org.apache.hive.hcatalog.mapreduce.HCatMapRedUtils.java

License:Apache License

/**
 * Creates a {@code TaskAttemptID} from the provided TaskAttemptContext. This
 * also performs logic to strip the crunch named output from the TaskAttemptID
 * already associated with the TaskAttemptContext. The TaskAttemptID requires
 * there to be six parts, separated by "_". With the named output the JobID
 * has 7 parts. That needs to be stripped away before a new TaskAttemptID can
 * be constructed./*from w  ww .  j  a  v a  2 s. com*/
 *
 * @param context
 *          The TaskAttemptContext
 * @return A TaskAttemptID with the crunch named output removed
 */
public static TaskAttemptID getTaskAttemptID(TaskAttemptContext context) {
    String taskAttemptId = context.getTaskAttemptID().toString();
    List<String> taskAttemptIDParts = Lists.newArrayList(taskAttemptId.split("_"));
    if (taskAttemptIDParts.size() < 7)
        return TaskAttemptID.forName(taskAttemptId);

    // index 2 is the 3rd element in the task attempt id, which will be the
    // named output
    taskAttemptIDParts.remove(2);
    String reducedTaskAttemptId = StringUtils.join(taskAttemptIDParts, "_");
    return TaskAttemptID.forName(reducedTaskAttemptId);
}

From source file:org.apache.hive.hcatalog.mapreduce.MultiOutputFormat.java

License:Apache License

/**
 * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias
 * and the actual TaskAttemptContext//  w w w.  java 2 s . c o  m
 * @param alias the name given to the OutputFormat configuration
 * @param context the Mapper or Reducer Context
 * @return a copy of the TaskAttemptContext with the alias configuration populated
 */
public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) {
    String aliasConf = context.getConfiguration().get(getAliasConfName(alias));
    TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim()
            .createTaskAttemptContext(context.getConfiguration(), context.getTaskAttemptID());
    addToConfig(aliasConf, aliasContext.getConfiguration());
    return aliasContext;
}

From source file:org.apache.hive.hcatalog.mapreduce.TaskCommitContextRegistry.java

License:Apache License

private String generateKey(TaskAttemptContext context) throws IOException {
    String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
    if (StringUtils.isBlank(jobInfoString)) { // Avoid the NPE.
        throw new IOException("Could not retrieve OutputJobInfo for TaskAttempt " + context.getTaskAttemptID());
    }/*from   w  ww  .j a  v  a2s.c o m*/
    OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
    return context.getTaskAttemptID().toString() + "@" + jobInfo.getLocation();
}

From source file:org.apache.kylin.storage.hbase.steps.HFileOutputFormat3.java

License:Apache License

static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter(
        final TaskAttemptContext context, final OutputCommitter committer)
        throws IOException, InterruptedException {

    // Get the path of the temporary output file
    final Path outputdir = ((FileOutputCommitter) committer).getWorkPath();
    final Configuration conf = context.getConfiguration();
    LOG.debug("Task output path: " + outputdir);
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);//from  ww w.j av a  2s  . c om

    // create a map from column family to the compression algorithm
    final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
    final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
    final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf);
    final DataBlockEncoding overriddenEncoding;
    if (dataBlockEncodingStr != null) {
        overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
    } else {
        overriddenEncoding = null;
    }

    return new RecordWriter<ImmutableBytesWritable, V>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        @Override
        public void write(ImmutableBytesWritable row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            if (row == null && kv == null) {
                rollWriters();
                return;
            }
            byte[] rowKey = CellUtil.cloneRow(kv);
            long length = kv.getLength();
            byte[] family = CellUtil.cloneFamily(kv);
            WriterLength wl = this.writers.get(family);
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            Algorithm compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            BloomType bloomType = bloomTypeMap.get(family);
            bloomType = bloomType == null ? BloomType.NONE : bloomType;
            Integer blockSize = blockSizeMap.get(family);
            blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
            DataBlockEncoding encoding = overriddenEncoding;
            encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
            encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
            contextBuilder.withDataBlockEncoding(encoding);
            HFileContext hFileContext = contextBuilder.build();

            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .withFileContext(hFileContext).build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}