List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID
public TaskAttemptID getTaskAttemptID();
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java
License:Apache License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(// cellKeyValue //static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter() //getRecordWriter() final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Path ignoreOutputPath = getDeleteRowKeyFile(outputPath);// final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default // compression. final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);//from www.j av a 2s. c o m // create a map from column family to the compression algorithm final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf); final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); final HFileDataBlockEncoder encoder; if (dataBlockEncodingStr == null) { encoder = NoOpDataBlockEncoder.INSTANCE; } else { try { encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr)); } catch (IllegalArgumentException ex) { throw new RuntimeException("Invalid data block encoding type configured for the param " + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); } } return new RecordWriter<ImmutableBytesWritable, KeyValue>() {// VKeyValue // Map of families to writers and how much has been output on the // writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private final FSDataOutputStream dos = fs.create(ignoreOutputPath); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv)// V cellKeyValue kv throws IOException { // KeyValue kv = KeyValueUtil.ensureKeyValue(cell);// // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); if (ignore(kv)) {// if byte[] readBuf = rowKey; dos.write(readBuf, 0, readBuf.length); dos.write(Bytes.toBytes("\n")); return; } WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory // exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* * Create a new StoreFile.Writer. * * @param family * * @return A WriterLength, containing a new StoreFile.Writer. * * @throws IOException */ private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); String compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; String bloomTypeStr = bloomTypeMap.get(family); BloomType bloomType = BloomType.NONE; if (bloomTypeStr != null) { bloomType = BloomType.valueOf(bloomTypeStr); } String blockSizeString = blockSizeMap.get(family); int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE : Integer.parseInt(blockSizeString); Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { dos.flush();// dos.close();// for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.pinterest.terrapin.hadoop.HFileOutputFormat.java
License:Apache License
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(TaskAttemptContext context) throws IOException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputDir.getFileSystem(conf); int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384); // Default to snappy. Compression.Algorithm compressionAlgorithm = getAlgorithm(conf.get(Constants.HFILE_COMPRESSION)); final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize) .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId())) .withCompression(compressionAlgorithm).build(); return new HFileRecordWriter(writer); }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Create a file output committer/*from w ww . j a va 2 s . c o m*/ * @param outputPath the job's output path * @param context the task's context * @throws IOException */ public FileOutputCommitter(Path outputPath, TaskAttemptContext context) throws IOException { super(outputPath, context); Job job = new Job(context.getConfiguration()); String outputDirectories = job.getConfiguration().get(MULTIPLE_OUTPUTS, ""); if (outputDirectories != null) { StringTokenizer st = new StringTokenizer(outputDirectories, " "); while (st.hasMoreTokens()) { pathNames.add(st.nextToken()); } } if (outputPath != null) { this.outputPath = outputPath; outputFileSystem = outputPath.getFileSystem(context.getConfiguration()); workPath = new Path(outputPath, (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_" + context.getTaskAttemptID().toString())).makeQualified(outputFileSystem); for (String p : pathNames) { if (outputPath.toString().endsWith(p)) { committers.put(p, this); fake = false; break; } } } }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Move the files from the work directory to the job output directory * @param context the task context/*from w ww.j a v a2s . c o m*/ */ public void commitTask(TaskAttemptContext context) throws IOException { if (!fake || (committers.size() == 0)) { TaskAttemptID attemptId = context.getTaskAttemptID(); if (workPath != null) { context.progress(); if (outputFileSystem.exists(workPath)) { // Move the task outputs to their final place moveTaskOutputs(context, outputFileSystem, outputPath, workPath); // Delete the temporary task-specific output directory if (!outputFileSystem.delete(workPath, true)) { LOG.warn("Failed to delete the temporary output" + " directory of task: " + attemptId + " - " + workPath); } LOG.info("Saved output of task '" + attemptId + "' to " + outputPath); } } // commitJob(context); } else { for (FileOutputCommitter c : committers.values()) { c.commitTask(context); } } }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Move all of the files from the work directory to the final output * @param context the task context// ww w .j a va 2 s.c om * @param fs the output file system * @param jobOutputDir the final output direcotry * @param taskOutput the work path * @throws IOException */ private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { TaskAttemptID attemptId = context.getTaskAttemptID(); context.progress(); if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) { throw new IOException("Failed to delete earlier output of task: " + attemptId); } if (!fs.rename(taskOutput, finalOutputPath)) { throw new IOException("Failed to save output of task: " + attemptId); } } LOG.debug("Moved " + taskOutput + " to " + finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) { moveTaskOutputs(context, fs, jobOutputDir, path.getPath()); } } } }
From source file:com.scaleoutsoftware.soss.hserver.CachingRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { this.split = (ImageInputSplit) split; this.context = context; LOG.info("Recording split: " + split); fallBackRecordReader.initialize(((ImageInputSplit) split).getFallbackInputSplit(), context); try {//w ww .j a v a 2s. co m bucketStore = BucketStoreFactory.getBucketStore(this.split.getImageIdString()); BucketId = ((ImageInputSplit) split).getBucketId(); sossAvailable = true; if (BucketId.isDummyId()) //This split has never been recorded { BucketId = bucketStore.getNextLocalBucketId(); } LOG.debug("Updating image before recording. " + this.split.getImageIdString()); updateImageBeforeRecording(context.getTaskAttemptID().getTaskID().toString()); } catch (StateServerException e) { LOG.error("Cannot connect to ScaleOut StateServer.", e); sossAvailable = false; } numberOfKeys = 0; }
From source file:com.splicemachine.derby.stream.spark.fake.FakeOutputCommitter.java
License:Apache License
@Override public void abortTask(TaskAttemptContext taskAttemptContext) throws IOException { String abortDirectory = taskAttemptContext.getConfiguration().get("abort.directory"); File file = new File(abortDirectory, taskAttemptContext.getTaskAttemptID().getTaskID().toString()); file.createNewFile();// w w w . ja v a 2 s. co m }
From source file:com.splicemachine.stream.index.HTableOutputFormat.java
License:Apache License
@Override public RecordWriter<byte[], Either<Exception, KVPair>> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { try {//from ww w. j ava 2s . com DataSetWriterBuilder tableWriter = TableWriterUtils .deserializeTableWriter(taskAttemptContext.getConfiguration()); TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID()); if (childTxn == null) throw new IOException("child transaction lookup failed"); tableWriter.txn(childTxn); return new HTableRecordWriter(tableWriter.buildTableWriter(), outputCommitter); } catch (Exception e) { throw new IOException(e); } }
From source file:com.splicemachine.stream.output.SMOutputFormat.java
License:Apache License
@Override public RecordWriter<RowLocation, Either<Exception, ExecRow>> getRecordWriter( TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { try {/*from ww w . j a v a 2 s . c o m*/ DataSetWriterBuilder dsWriter = TableWriterUtils .deserializeTableWriter(taskAttemptContext.getConfiguration()); TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID()); if (childTxn == null) throw new IOException("child transaction lookup failed"); dsWriter.txn(outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID())); return new SMRecordWriter(dsWriter.buildTableWriter(), outputCommitter); } catch (Exception e) { throw new IOException(e); } }
From source file:com.splicemachine.stream.output.SpliceOutputCommitter.java
License:Apache License
@Override public void setupTask(TaskAttemptContext taskContext) throws IOException { if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG, "setupTask"); // Create child additive transaction so we don't read rows inserted by ourselves in this operation TxnView txn = SIDriver.driver().lifecycleManager().beginChildTransaction(parentTxn, parentTxn.getIsolationLevel(), true, destinationTable); ActiveWriteTxn childTxn = new ActiveWriteTxn(txn.getTxnId(), txn.getTxnId(), parentTxn, true, parentTxn.getIsolationLevel()); taskAttemptMap.put(taskContext.getTaskAttemptID(), childTxn); if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG, "beginTxn=%s and destinationTable=%s", childTxn, destinationTable); }