List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID
public TaskAttemptID getTaskAttemptID();
From source file:org.apache.giraph.job.HadoopUtils.java
License:Apache License
/** * Create a TaskAttemptContext, supporting many Hadoops. * * @param conf Configuration/*from w w w .j av a2s .c om*/ * @param taskAttemptContext Use TaskAttemptID from this object * @return TaskAttemptContext */ public static TaskAttemptContext makeTaskAttemptContext(Configuration conf, TaskAttemptContext taskAttemptContext) { return makeTaskAttemptContext(conf, taskAttemptContext.getTaskAttemptID()); }
From source file:org.apache.hcatalog.mapreduce.DefaultOutputFormatContainer.java
License:Apache License
/** * Get the record writer for the job. Uses the storagehandler's OutputFormat * to get the record writer.// ww w . j a v a 2s .co m * @param context the information about the current task. * @return a RecordWriter to write the output for the job. * @throws IOException */ @Override public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { String name = getOutputName(context.getTaskAttemptID().getTaskID().getId()); return new DefaultRecordWriterContainer(context, getBaseOutputFormat().getRecordWriter(null, new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context))); }
From source file:org.apache.hcatalog.mapreduce.FileRecordWriterContainer.java
License:Apache License
@Override public void write(WritableComparable<?> key, HCatRecord value) throws IOException, InterruptedException { org.apache.hadoop.mapred.RecordWriter localWriter; ObjectInspector localObjectInspector; SerDe localSerDe;// w w w .j a v a2s . c om OutputJobInfo localJobInfo = null; if (dynamicPartitioningUsed) { // calculate which writer to use from the remaining values - this needs to be done before we delete cols List<String> dynamicPartValues = new ArrayList<String>(); for (Integer colToAppend : dynamicPartCols) { dynamicPartValues.add(value.get(colToAppend).toString()); } String dynKey = dynamicPartValues.toString(); if (!baseDynamicWriters.containsKey(dynKey)) { if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) { throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, "Number of dynamic partitions being created " + "exceeds configured max allowable partitions[" + maxDynamicPartitions + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + "] if needed."); } org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil .createTaskAttemptContext(context); configureDynamicStorageHandler(currTaskContext, dynamicPartValues); localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext); //setup serDe SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); try { InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } //create base OutputFormat org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils .newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf()); //We are skipping calling checkOutputSpecs() for each partition //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance //In general this should be ok for most FileOutputFormat implementations //but may become an issue for cases when the method is used to perform other setup tasks //get Output Committer org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf() .getOutputCommitter(); //create currJobContext the latest so it gets all the config changes org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil .createJobContext(currTaskContext); //setupJob() baseOutputCommitter.setupJob(currJobContext); //recreate to refresh jobConf of currTask context currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(), currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible()); //set temp location currTaskContext.getConfiguration().set("mapred.work.output.dir", new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath() .toString()); //setupTask() baseOutputCommitter.setupTask(currTaskContext); Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir")); Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext, "part", "")); org.apache.hadoop.mapred.RecordWriter baseRecordWriter = baseOF.getRecordWriter( parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(), childPath.toString(), InternalUtil.createReporter(currTaskContext)); baseDynamicWriters.put(dynKey, baseRecordWriter); baseDynamicSerDe.put(dynKey, currSerDe); baseDynamicCommitters.put(dynKey, baseOutputCommitter); dynamicContexts.put(dynKey, currTaskContext); dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey))); } localJobInfo = dynamicOutputJobInfo.get(dynKey); localWriter = baseDynamicWriters.get(dynKey); localSerDe = baseDynamicSerDe.get(dynKey); localObjectInspector = dynamicObjectInspectors.get(dynKey); } else { localJobInfo = jobInfo; localWriter = getBaseRecordWriter(); localSerDe = serDe; localObjectInspector = objectInspector; } for (Integer colToDel : partColsToDel) { value.remove(colToDel); } //The key given by user is ignored try { localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector)); } catch (SerDeException e) { throw new IOException("Failed to serialize object", e); } }
From source file:org.apache.hcatalog.mapreduce.MultiOutputFormat.java
License:Apache License
/** * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias * and the actual TaskAttemptContext//w w w . j a v a 2s . co m * @param alias the name given to the OutputFormat configuration * @param context the Mapper or Reducer Context * @return a copy of the TaskAttemptContext with the alias configuration populated */ public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) { String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); TaskAttemptContext aliasContext = HCatHadoopShims.Instance.get() .createTaskAttemptContext(context.getConfiguration(), context.getTaskAttemptID()); addToConfig(aliasConf, aliasContext.getConfiguration()); return aliasContext; }
From source file:org.apache.hive.hcatalog.mapreduce.DefaultOutputFormatContainer.java
License:Apache License
static synchronized String getOutputName(TaskAttemptContext context) { return context.getConfiguration().get("mapreduce.output.basename", "part") + "-" + NUMBER_FORMAT.format(context.getTaskAttemptID().getTaskID().getId()); }
From source file:org.apache.hive.hcatalog.mapreduce.DynamicPartitionFileRecordWriterContainer.java
License:Apache License
@Override protected LocalFileWriter getLocalFileWriter(HCatRecord value) throws IOException, HCatException { OutputJobInfo localJobInfo = null;//w ww. j a v a2 s . c o m // Calculate which writer to use from the remaining values - this needs to // be done before we delete cols. List<String> dynamicPartValues = new ArrayList<String>(); for (Integer colToAppend : dynamicPartCols) { Object partitionValue = value.get(colToAppend); dynamicPartValues .add(partitionValue == null ? HIVE_DEFAULT_PARTITION_VALUE : partitionValue.toString()); } String dynKey = dynamicPartValues.toString(); if (!baseDynamicWriters.containsKey(dynKey)) { if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) { throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, "Number of dynamic partitions being created " + "exceeds configured max allowable partitions[" + maxDynamicPartitions + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + "] if needed."); } org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil .createTaskAttemptContext(context); configureDynamicStorageHandler(currTaskContext, dynamicPartValues); localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext.getConfiguration()); // Setup serDe. SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); try { InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } // create base OutputFormat org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils .newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf()); // We are skipping calling checkOutputSpecs() for each partition // As it can throw a FileAlreadyExistsException when more than one // mapper is writing to a partition. // See HCATALOG-490, also to avoid contacting the namenode for each new // FileOutputFormat instance. // In general this should be ok for most FileOutputFormat implementations // but may become an issue for cases when the method is used to perform // other setup tasks. // Get Output Committer org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf() .getOutputCommitter(); // Create currJobContext the latest so it gets all the config changes org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext); // Set up job. baseOutputCommitter.setupJob(currJobContext); // Recreate to refresh jobConf of currTask context. currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(), currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible()); // Set temp location. currTaskContext.getConfiguration().set("mapred.work.output.dir", new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath() .toString()); // Set up task. baseOutputCommitter.setupTask(currTaskContext); Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir")); Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext, currTaskContext.getConfiguration().get("mapreduce.output.basename", "part"), "")); RecordWriter baseRecordWriter = baseOF.getRecordWriter( parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(), childPath.toString(), InternalUtil.createReporter(currTaskContext)); baseDynamicWriters.put(dynKey, baseRecordWriter); baseDynamicSerDe.put(dynKey, currSerDe); baseDynamicCommitters.put(dynKey, baseOutputCommitter); dynamicContexts.put(dynKey, currTaskContext); dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey).getConfiguration())); } return new LocalFileWriter(baseDynamicWriters.get(dynKey), dynamicObjectInspectors.get(dynKey), baseDynamicSerDe.get(dynKey), dynamicOutputJobInfo.get(dynKey)); }
From source file:org.apache.hive.hcatalog.mapreduce.HCatMapRedUtils.java
License:Apache License
/** * Creates a {@code TaskAttemptID} from the provided TaskAttemptContext. This * also performs logic to strip the crunch named output from the TaskAttemptID * already associated with the TaskAttemptContext. The TaskAttemptID requires * there to be six parts, separated by "_". With the named output the JobID * has 7 parts. That needs to be stripped away before a new TaskAttemptID can * be constructed./*from w ww . j a v a 2 s. com*/ * * @param context * The TaskAttemptContext * @return A TaskAttemptID with the crunch named output removed */ public static TaskAttemptID getTaskAttemptID(TaskAttemptContext context) { String taskAttemptId = context.getTaskAttemptID().toString(); List<String> taskAttemptIDParts = Lists.newArrayList(taskAttemptId.split("_")); if (taskAttemptIDParts.size() < 7) return TaskAttemptID.forName(taskAttemptId); // index 2 is the 3rd element in the task attempt id, which will be the // named output taskAttemptIDParts.remove(2); String reducedTaskAttemptId = StringUtils.join(taskAttemptIDParts, "_"); return TaskAttemptID.forName(reducedTaskAttemptId); }
From source file:org.apache.hive.hcatalog.mapreduce.MultiOutputFormat.java
License:Apache License
/** * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias * and the actual TaskAttemptContext// w w w. java 2 s . c o m * @param alias the name given to the OutputFormat configuration * @param context the Mapper or Reducer Context * @return a copy of the TaskAttemptContext with the alias configuration populated */ public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) { String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim() .createTaskAttemptContext(context.getConfiguration(), context.getTaskAttemptID()); addToConfig(aliasConf, aliasContext.getConfiguration()); return aliasContext; }
From source file:org.apache.hive.hcatalog.mapreduce.TaskCommitContextRegistry.java
License:Apache License
private String generateKey(TaskAttemptContext context) throws IOException { String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); if (StringUtils.isBlank(jobInfoString)) { // Avoid the NPE. throw new IOException("Could not retrieve OutputJobInfo for TaskAttempt " + context.getTaskAttemptID()); }/*from w ww .j a v a2s.c o m*/ OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString); return context.getTaskAttemptID().toString() + "@" + jobInfo.getLocation(); }
From source file:org.apache.kylin.storage.hbase.steps.HFileOutputFormat3.java
License:Apache License
static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter( final TaskAttemptContext context, final OutputCommitter committer) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputdir = ((FileOutputCommitter) committer).getWorkPath(); final Configuration conf = context.getConfiguration(); LOG.debug("Task output path: " + outputdir); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);//from ww w.j av a 2s . c om // create a map from column family to the compression algorithm final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf); final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY); final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf); final DataBlockEncoding overriddenEncoding; if (dataBlockEncodingStr != null) { overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr); } else { overriddenEncoding = null; } return new RecordWriter<ImmutableBytesWritable, V>() { // Map of families to writers and how much has been output on the writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; @Override public void write(ImmutableBytesWritable row, V cell) throws IOException { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = CellUtil.cloneRow(kv); long length = kv.getLength(); byte[] family = CellUtil.cloneFamily(kv); WriterLength wl = this.writers.get(family); if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important") private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); Algorithm compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; BloomType bloomType = bloomTypeMap.get(family); bloomType = bloomType == null ? BloomType.NONE : bloomType; Integer blockSize = blockSizeMap.get(family); blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize; DataBlockEncoding encoding = overriddenEncoding; encoding = encoding == null ? datablockEncodingMap.get(family) : encoding; encoding = encoding == null ? DataBlockEncoding.NONE : encoding; Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression) .withChecksumType(HStore.getChecksumType(conf)) .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize); contextBuilder.withDataBlockEncoding(encoding); HFileContext hFileContext = contextBuilder.build(); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .withFileContext(hFileContext).build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } @Override public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }