List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID
public TaskAttemptID getTaskAttemptID();
From source file:gobblin.runtime.mapreduce.GobblinOutputCommitter.java
License:Apache License
@Override public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException { return this.attemptIdToMultiTaskAttempt.containsKey(arg0.getTaskAttemptID().toString()); }
From source file:gr.ntua.h2rdf.inputFormat.MultiHFileOutputFormat.java
License:Open Source License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { return new RecordWriter<ImmutableBytesWritable, KeyValue>() { @Override/* www . j av a2s .c om*/ public void close(TaskAttemptContext context) throws IOException, InterruptedException { for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) { writer.close(context); } } @Override public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException { RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key); if (writer == null) { final Path outputPath = new Path( FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get())); writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() { final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); final Path outputdir = committer.getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE); final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); // Map of families to writers and how much has been output on the writer. final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; final byte[] now = Bytes.toBytes(System.currentTimeMillis()); boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir, Configuration conf) throws IOException { if (writer != null) { close(writer); } return HFile.getWriterFactoryNoCache(conf).create(); //return HFile.getWriterFactory(conf).createWriter(fs, StoreFile.getUniqueFile(fs, familydir), // blocksize, compression, KeyValue.KEY_COMPARATOR); // return new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), // blocksize, compression, KeyValue.KEY_COMPARATOR); } private WriterLength getNewWriter(byte[] family) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); wl.writer = getNewWriter(wl.writer, familydir, conf); this.writers.put(family, wl); return wl; } private void close(final HFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } committer.commitTask(c); } }; writers.put(key, writer); } writer.write(new ImmutableBytesWritable(value.getRow()), value); } }; }
From source file:input_format.MultiHFileOutputFormat.java
License:Open Source License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { return new RecordWriter<ImmutableBytesWritable, KeyValue>() { @Override//from w w w . j ava 2s . co m public void close(TaskAttemptContext context) throws IOException, InterruptedException { for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) { writer.close(context); } } @Override public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException { RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key); if (writer == null) { final Path outputPath = new Path( FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get())); writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() { final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); final Path outputdir = committer.getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE); final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); // Map of families to writers and how much has been output on the writer. final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; final byte[] now = Bytes.toBytes(System.currentTimeMillis()); boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir, Configuration conf) throws IOException { if (writer != null) { close(writer); } return HFile.getWriterFactory(conf).createWriter(fs, StoreFile.getUniqueFile(fs, familydir), blocksize, compression, KeyValue.KEY_COMPARATOR); // return new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), // blocksize, compression, KeyValue.KEY_COMPARATOR); } private WriterLength getNewWriter(byte[] family) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); wl.writer = getNewWriter(wl.writer, familydir, conf); this.writers.put(family, wl); return wl; } private void close(final HFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } committer.commitTask(c); } }; writers.put(key, writer); } writer.write(new ImmutableBytesWritable(value.getRow()), value); } }; }
From source file:kafka.bridge.hadoop.KafkaOutputFormat.java
License:Apache License
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { // Is there a programmatic way to get the temp dir? I see it hardcoded everywhere in Hadoop, Hive, and Pig. return new FileOutputCommitter( new Path("/tmp/" + taskAttemptContext.getTaskAttemptID().getJobID().toString()), taskAttemptContext);// w ww. j av a 2 s . co m }
From source file:org.apache.avro.mapreduce.TestAvroKeyOutputFormat.java
License:Apache License
/** * Tests that the record writer is constructed and returned correctly from the output format. *//*from w w w . j a va 2 s.c o m*/ private void testGetRecordWriter(Configuration conf, CodecFactory expectedCodec) throws IOException { // Configure a mock task attempt context. Job job = new Job(conf); job.getConfiguration().set("mapred.output.dir", mTempDir.getRoot().getPath()); Schema writerSchema = Schema.create(Schema.Type.INT); AvroJob.setOutputKeySchema(job, writerSchema); TaskAttemptContext context = createMock(TaskAttemptContext.class); expect(context.getConfiguration()).andReturn(job.getConfiguration()).anyTimes(); expect(context.getTaskAttemptID()).andReturn(TaskAttemptID.forName("attempt_200707121733_0001_m_000000_0")) .anyTimes(); expect(context.getNumReduceTasks()).andReturn(1); // Create a mock record writer. @SuppressWarnings("unchecked") RecordWriter<AvroKey<Integer>, NullWritable> expectedRecordWriter = createMock(RecordWriter.class); AvroKeyOutputFormat.RecordWriterFactory recordWriterFactory = createMock( AvroKeyOutputFormat.RecordWriterFactory.class); // Expect the record writer factory to be called with appropriate parameters. Capture<CodecFactory> capturedCodecFactory = new Capture<CodecFactory>(); expect(recordWriterFactory.create(eq(writerSchema), anyObject(GenericData.class), capture(capturedCodecFactory), // Capture for comparison later. anyObject(OutputStream.class))).andReturn(expectedRecordWriter); replay(context); replay(expectedRecordWriter); replay(recordWriterFactory); AvroKeyOutputFormat<Integer> outputFormat = new AvroKeyOutputFormat<Integer>(recordWriterFactory); RecordWriter<AvroKey<Integer>, NullWritable> recordWriter = outputFormat.getRecordWriter(context); // Make sure the expected codec was used. assertTrue(capturedCodecFactory.hasCaptured()); assertEquals(expectedCodec.toString(), capturedCodecFactory.getValue().toString()); verify(context); verify(expectedRecordWriter); verify(recordWriterFactory); assertNotNull(recordWriter); assertTrue(expectedRecordWriter == recordWriter); }
From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java
License:Apache License
@Override public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { int numReduceTasks = context.getNumReduceTasks(); TaskAttemptID taskAttemptID = context.getTaskAttemptID(); return taskAttemptID.isMap() && numReduceTasks != 0 ? false : true; }
From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java
License:Apache License
private Conf setup(TaskAttemptContext context) throws IOException { LOG.info("Setting up committer with task attempt [{0}]", context.getTaskAttemptID().toString()); Conf conf = new Conf(); conf._configuration = context.getConfiguration(); conf._tableDescriptor = BlurOutputFormat.getTableDescriptor(conf._configuration); int shardCount = conf._tableDescriptor.getShardCount(); int attemptId = context.getTaskAttemptID().getTaskID().getId(); int shardId = attemptId % shardCount; conf._taskAttemptID = context.getTaskAttemptID(); Path tableOutput = BlurOutputFormat.getOutputPath(conf._configuration); String shardName = ShardUtil.getShardName(BlurConstants.SHARD_PREFIX, shardId); conf._indexPath = new Path(tableOutput, shardName); conf._newIndex = new Path(conf._indexPath, conf._taskAttemptID.toString() + ".tmp"); return conf;//from w ww. jav a2 s . co m }
From source file:org.apache.blur.mapreduce.lib.BlurOutputFormat.java
License:Apache License
@Override public RecordWriter<Text, BlurMutate> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { int id = context.getTaskAttemptID().getTaskID().getId(); TaskAttemptID taskAttemptID = context.getTaskAttemptID(); final GenericBlurRecordWriter writer = new GenericBlurRecordWriter(context.getConfiguration(), id, taskAttemptID.toString() + ".tmp"); return new RecordWriter<Text, BlurMutate>() { @Override// w w w. ja v a 2 s . c o m public void write(Text key, BlurMutate value) throws IOException, InterruptedException { writer.write(key, value); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { writer.close(); } }; }
From source file:org.apache.crunch.impl.mr.run.CrunchRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { this.crunchSplit = (CrunchInputSplit) inputSplit; this.context = context; Configuration conf = crunchSplit.getConf(); if (conf == null) { conf = context.getConfiguration(); crunchSplit.setConf(conf);//www . java 2 s.c o m } if (crunchSplit.getInputSplit() instanceof CombineFileSplit) { combineFileSplit = (CombineFileSplit) crunchSplit.getInputSplit(); } if (curReader != null) { curReader.initialize(getDelegateSplit(), TaskAttemptContextFactory.create(conf, context.getTaskAttemptID())); } }
From source file:org.apache.crunch.io.hbase.HFileOutputFormatForCrunch.java
License:Apache License
@Override public RecordWriter<Object, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { Path outputPath = getDefaultWorkFile(context, ""); Configuration conf = context.getConfiguration(); FileSystem fs = outputPath.getFileSystem(conf); final boolean compactionExclude = conf.getBoolean(COMPACTION_EXCLUDE_CONF_KEY, false); String hcolStr = conf.get(HCOLUMN_DESCRIPTOR_KEY); if (hcolStr == null) { throw new AssertionError(HCOLUMN_DESCRIPTOR_KEY + " is not set in conf"); }/*w w w .j a v a 2 s. c o m*/ byte[] hcolBytes; try { hcolBytes = Hex.decodeHex(hcolStr.toCharArray()); } catch (DecoderException e) { throw new AssertionError("Bad hex string: " + hcolStr); } HColumnDescriptor hcol = new HColumnDescriptor(); hcol.readFields(new DataInputStream(new ByteArrayInputStream(hcolBytes))); LOG.info("Output path: " + outputPath); LOG.info("HColumnDescriptor: " + hcol.toString()); final HFile.Writer writer = HFile.getWriterFactoryNoCache(conf).withPath(fs, outputPath) .withBlockSize(hcol.getBlocksize()).withCompression(hcol.getCompression()) .withComparator(KeyValue.KEY_COMPARATOR) .withDataBlockEncoder(new HFileDataBlockEncoderImpl(hcol.getDataBlockEncoding())) .withChecksumType(Store.getChecksumType(conf)).withBytesPerChecksum(Store.getBytesPerChecksum(conf)) .create(); return new RecordWriter<Object, KeyValue>() { @Override public void write(Object row, KeyValue kv) throws IOException { if (kv.getTimestamp() == HConstants.LATEST_TIMESTAMP) { kv.updateLatestStamp(now); } writer.append(kv); trt.includeTimestamp(kv); } @Override public void close(TaskAttemptContext c) throws IOException, InterruptedException { writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); writer.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); writer.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); writer.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); writer.appendFileInfo(StoreFile.TIMERANGE_KEY, WritableUtils.toByteArray(trt)); writer.close(); } }; }