Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:org.apache.giraph.job.HadoopUtils.java

License:Apache License

/**
 * Create a TaskAttemptContext, supporting many Hadoops.
 *
 * @param conf Configuration/*from w  w  w  .j  av a2s .c om*/
 * @param taskAttemptContext Use TaskAttemptID from this object
 * @return TaskAttemptContext
 */
public static TaskAttemptContext makeTaskAttemptContext(Configuration conf,
        TaskAttemptContext taskAttemptContext) {
    return makeTaskAttemptContext(conf, taskAttemptContext.getTaskAttemptID());
}

From source file:org.apache.hcatalog.mapreduce.DefaultOutputFormatContainer.java

License:Apache License

/**
 * Get the record writer for the job. Uses the storagehandler's OutputFormat
 * to get the record writer.// ww w .  j a  v a  2s  .co  m
 * @param context the information about the current task.
 * @return a RecordWriter to write the output for the job.
 * @throws IOException
 */
@Override
public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    String name = getOutputName(context.getTaskAttemptID().getTaskID().getId());
    return new DefaultRecordWriterContainer(context, getBaseOutputFormat().getRecordWriter(null,
            new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context)));
}

From source file:org.apache.hcatalog.mapreduce.FileRecordWriterContainer.java

License:Apache License

@Override
public void write(WritableComparable<?> key, HCatRecord value) throws IOException, InterruptedException {

    org.apache.hadoop.mapred.RecordWriter localWriter;
    ObjectInspector localObjectInspector;
    SerDe localSerDe;// w w w  .j  a v  a2s  .  c  om
    OutputJobInfo localJobInfo = null;

    if (dynamicPartitioningUsed) {
        // calculate which writer to use from the remaining values - this needs to be done before we delete cols
        List<String> dynamicPartValues = new ArrayList<String>();
        for (Integer colToAppend : dynamicPartCols) {
            dynamicPartValues.add(value.get(colToAppend).toString());
        }

        String dynKey = dynamicPartValues.toString();
        if (!baseDynamicWriters.containsKey(dynKey)) {
            if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) {
                throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                        "Number of dynamic partitions being created "
                                + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                + "] if needed.");
            }

            org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil
                    .createTaskAttemptContext(context);
            configureDynamicStorageHandler(currTaskContext, dynamicPartValues);
            localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext);

            //setup serDe
            SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
                    currTaskContext.getJobConf());
            try {
                InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo);
            } catch (SerDeException e) {
                throw new IOException("Failed to initialize SerDe", e);
            }

            //create base OutputFormat
            org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils
                    .newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());

            //We are skipping calling checkOutputSpecs() for each partition
            //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition
            //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance
            //In general this should be ok for most FileOutputFormat implementations
            //but may become an issue for cases when the method is used to perform other setup tasks

            //get Output Committer
            org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf()
                    .getOutputCommitter();
            //create currJobContext the latest so it gets all the config changes
            org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil
                    .createJobContext(currTaskContext);
            //setupJob()
            baseOutputCommitter.setupJob(currJobContext);
            //recreate to refresh jobConf of currTask context
            currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(),
                    currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible());
            //set temp location
            currTaskContext.getConfiguration().set("mapred.work.output.dir",
                    new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath()
                            .toString());
            //setupTask()
            baseOutputCommitter.setupTask(currTaskContext);

            Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
            Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext, "part", ""));

            org.apache.hadoop.mapred.RecordWriter baseRecordWriter = baseOF.getRecordWriter(
                    parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(),
                    childPath.toString(), InternalUtil.createReporter(currTaskContext));

            baseDynamicWriters.put(dynKey, baseRecordWriter);
            baseDynamicSerDe.put(dynKey, currSerDe);
            baseDynamicCommitters.put(dynKey, baseOutputCommitter);
            dynamicContexts.put(dynKey, currTaskContext);
            dynamicObjectInspectors.put(dynKey,
                    InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()));
            dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey)));
        }

        localJobInfo = dynamicOutputJobInfo.get(dynKey);
        localWriter = baseDynamicWriters.get(dynKey);
        localSerDe = baseDynamicSerDe.get(dynKey);
        localObjectInspector = dynamicObjectInspectors.get(dynKey);
    } else {
        localJobInfo = jobInfo;
        localWriter = getBaseRecordWriter();
        localSerDe = serDe;
        localObjectInspector = objectInspector;
    }

    for (Integer colToDel : partColsToDel) {
        value.remove(colToDel);
    }

    //The key given by user is ignored
    try {
        localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector));
    } catch (SerDeException e) {
        throw new IOException("Failed to serialize object", e);
    }
}

From source file:org.apache.hcatalog.mapreduce.MultiOutputFormat.java

License:Apache License

/**
 * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias
 * and the actual TaskAttemptContext//w  w w .  j a v  a 2s . co m
 * @param alias the name given to the OutputFormat configuration
 * @param context the Mapper or Reducer Context
 * @return a copy of the TaskAttemptContext with the alias configuration populated
 */
public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) {
    String aliasConf = context.getConfiguration().get(getAliasConfName(alias));
    TaskAttemptContext aliasContext = HCatHadoopShims.Instance.get()
            .createTaskAttemptContext(context.getConfiguration(), context.getTaskAttemptID());
    addToConfig(aliasConf, aliasContext.getConfiguration());
    return aliasContext;
}

From source file:org.apache.hive.hcatalog.mapreduce.DefaultOutputFormatContainer.java

License:Apache License

static synchronized String getOutputName(TaskAttemptContext context) {
    return context.getConfiguration().get("mapreduce.output.basename", "part") + "-"
            + NUMBER_FORMAT.format(context.getTaskAttemptID().getTaskID().getId());
}

From source file:org.apache.hive.hcatalog.mapreduce.DynamicPartitionFileRecordWriterContainer.java

License:Apache License

@Override
protected LocalFileWriter getLocalFileWriter(HCatRecord value) throws IOException, HCatException {
    OutputJobInfo localJobInfo = null;//w ww.  j a  v a2 s . c  o  m
    // Calculate which writer to use from the remaining values - this needs to
    // be done before we delete cols.
    List<String> dynamicPartValues = new ArrayList<String>();
    for (Integer colToAppend : dynamicPartCols) {
        Object partitionValue = value.get(colToAppend);
        dynamicPartValues
                .add(partitionValue == null ? HIVE_DEFAULT_PARTITION_VALUE : partitionValue.toString());
    }

    String dynKey = dynamicPartValues.toString();
    if (!baseDynamicWriters.containsKey(dynKey)) {
        if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) {
            throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                    "Number of dynamic partitions being created "
                            + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                            + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                            + "] if needed.");
        }

        org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil
                .createTaskAttemptContext(context);
        configureDynamicStorageHandler(currTaskContext, dynamicPartValues);
        localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext.getConfiguration());

        // Setup serDe.
        SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
                currTaskContext.getJobConf());
        try {
            InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo);
        } catch (SerDeException e) {
            throw new IOException("Failed to initialize SerDe", e);
        }

        // create base OutputFormat
        org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils
                .newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());

        // We are skipping calling checkOutputSpecs() for each partition
        // As it can throw a FileAlreadyExistsException when more than one
        // mapper is writing to a partition.
        // See HCATALOG-490, also to avoid contacting the namenode for each new
        // FileOutputFormat instance.
        // In general this should be ok for most FileOutputFormat implementations
        // but may become an issue for cases when the method is used to perform
        // other setup tasks.

        // Get Output Committer
        org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf()
                .getOutputCommitter();

        // Create currJobContext the latest so it gets all the config changes
        org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext);

        // Set up job.
        baseOutputCommitter.setupJob(currJobContext);

        // Recreate to refresh jobConf of currTask context.
        currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(),
                currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible());

        // Set temp location.
        currTaskContext.getConfiguration().set("mapred.work.output.dir",
                new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath()
                        .toString());

        // Set up task.
        baseOutputCommitter.setupTask(currTaskContext);

        Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
        Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext,
                currTaskContext.getConfiguration().get("mapreduce.output.basename", "part"), ""));

        RecordWriter baseRecordWriter = baseOF.getRecordWriter(
                parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(),
                childPath.toString(), InternalUtil.createReporter(currTaskContext));

        baseDynamicWriters.put(dynKey, baseRecordWriter);
        baseDynamicSerDe.put(dynKey, currSerDe);
        baseDynamicCommitters.put(dynKey, baseOutputCommitter);
        dynamicContexts.put(dynKey, currTaskContext);
        dynamicObjectInspectors.put(dynKey,
                InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()));
        dynamicOutputJobInfo.put(dynKey,
                HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey).getConfiguration()));
    }

    return new LocalFileWriter(baseDynamicWriters.get(dynKey), dynamicObjectInspectors.get(dynKey),
            baseDynamicSerDe.get(dynKey), dynamicOutputJobInfo.get(dynKey));
}

From source file:org.apache.hive.hcatalog.mapreduce.HCatMapRedUtils.java

License:Apache License

/**
 * Creates a {@code TaskAttemptID} from the provided TaskAttemptContext. This
 * also performs logic to strip the crunch named output from the TaskAttemptID
 * already associated with the TaskAttemptContext. The TaskAttemptID requires
 * there to be six parts, separated by "_". With the named output the JobID
 * has 7 parts. That needs to be stripped away before a new TaskAttemptID can
 * be constructed./*from w  ww .  j  a  v a  2 s. com*/
 *
 * @param context
 *          The TaskAttemptContext
 * @return A TaskAttemptID with the crunch named output removed
 */
public static TaskAttemptID getTaskAttemptID(TaskAttemptContext context) {
    String taskAttemptId = context.getTaskAttemptID().toString();
    List<String> taskAttemptIDParts = Lists.newArrayList(taskAttemptId.split("_"));
    if (taskAttemptIDParts.size() < 7)
        return TaskAttemptID.forName(taskAttemptId);

    // index 2 is the 3rd element in the task attempt id, which will be the
    // named output
    taskAttemptIDParts.remove(2);
    String reducedTaskAttemptId = StringUtils.join(taskAttemptIDParts, "_");
    return TaskAttemptID.forName(reducedTaskAttemptId);
}

From source file:org.apache.hive.hcatalog.mapreduce.MultiOutputFormat.java

License:Apache License

/**
 * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias
 * and the actual TaskAttemptContext//  w w w.  java 2 s . c o  m
 * @param alias the name given to the OutputFormat configuration
 * @param context the Mapper or Reducer Context
 * @return a copy of the TaskAttemptContext with the alias configuration populated
 */
public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) {
    String aliasConf = context.getConfiguration().get(getAliasConfName(alias));
    TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim()
            .createTaskAttemptContext(context.getConfiguration(), context.getTaskAttemptID());
    addToConfig(aliasConf, aliasContext.getConfiguration());
    return aliasContext;
}

From source file:org.apache.hive.hcatalog.mapreduce.TaskCommitContextRegistry.java

License:Apache License

private String generateKey(TaskAttemptContext context) throws IOException {
    String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
    if (StringUtils.isBlank(jobInfoString)) { // Avoid the NPE.
        throw new IOException("Could not retrieve OutputJobInfo for TaskAttempt " + context.getTaskAttemptID());
    }/*from   w  ww  .j a  v  a2s.c o m*/
    OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
    return context.getTaskAttemptID().toString() + "@" + jobInfo.getLocation();
}

From source file:org.apache.kylin.storage.hbase.steps.HFileOutputFormat3.java

License:Apache License

static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter(
        final TaskAttemptContext context, final OutputCommitter committer)
        throws IOException, InterruptedException {

    // Get the path of the temporary output file
    final Path outputdir = ((FileOutputCommitter) committer).getWorkPath();
    final Configuration conf = context.getConfiguration();
    LOG.debug("Task output path: " + outputdir);
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);//from  ww w.j av a  2s  . c om

    // create a map from column family to the compression algorithm
    final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
    final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
    final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf);
    final DataBlockEncoding overriddenEncoding;
    if (dataBlockEncodingStr != null) {
        overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
    } else {
        overriddenEncoding = null;
    }

    return new RecordWriter<ImmutableBytesWritable, V>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        @Override
        public void write(ImmutableBytesWritable row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            if (row == null && kv == null) {
                rollWriters();
                return;
            }
            byte[] rowKey = CellUtil.cloneRow(kv);
            long length = kv.getLength();
            byte[] family = CellUtil.cloneFamily(kv);
            WriterLength wl = this.writers.get(family);
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            Algorithm compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            BloomType bloomType = bloomTypeMap.get(family);
            bloomType = bloomType == null ? BloomType.NONE : bloomType;
            Integer blockSize = blockSizeMap.get(family);
            blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
            DataBlockEncoding encoding = overriddenEncoding;
            encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
            encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
            contextBuilder.withDataBlockEncoding(encoding);
            HFileContext hFileContext = contextBuilder.build();

            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .withFileContext(hFileContext).build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}