List of usage examples for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID
@Deprecated public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap, int taskId, int id)
From source file:com.twitter.scalding.parquet.scrooge.ParquetScroogeSchemeTest.java
License:Apache License
private void writeParquetFile(List<TBase> recordsToWrite, Configuration conf, Path parquetFile) throws IOException, InterruptedException, org.apache.thrift.TException { //create a test file final TProtocolFactory protocolFactory = new TCompactProtocol.Factory(); final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0); Class writeClass = recordsToWrite.get(0).getClass(); final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile, ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, writeClass); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos)); for (TBase recordToWrite : recordsToWrite) { recordToWrite.write(protocol);/*from w ww .ja v a 2 s . c om*/ } w.write(new BytesWritable(baos.toByteArray())); w.close(); }
From source file:cz.seznam.euphoria.hadoop.HadoopUtils.java
License:Apache License
public static TaskAttemptContext createTaskContext(Configuration conf, int taskNumber) { // TODO uses some default hard-coded values TaskAttemptID taskAttemptID = new TaskAttemptID("0", // job tracker ID 0, // job number, TaskType.REDUCE, // task type, taskNumber, // task ID 0); // task attempt return new TaskAttemptContextImpl(conf, taskAttemptID); }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.mapreduce.MapperOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException { final HadoopHelper helper = new HadoopHelper(config); final Configuration conf = helper.getConfiguration(); final Mapper<K1, V1, K2, V2> mapper = helper.getMapper(); final InputFormat<K1, V1> inputFormat = helper.getInputFormat(); final IInputSplitProvider isp = factory.createInputSplitProvider(partition); final TaskAttemptID taId = new TaskAttemptID("foo", jobId, true, partition, 0); final TaskAttemptContext taskAttemptContext = helper.createTaskAttemptContext(taId); final int framesLimit = helper.getSortFrameLimit(ctx); final IBinaryComparatorFactory[] comparatorFactories = helper.getSortComparatorFactories(); class SortingRecordWriter extends RecordWriter<K2, V2> { private final ArrayTupleBuilder tb; private final ByteBuffer frame; private final FrameTupleAppender fta; private ExternalSortRunGenerator runGen; private int blockId; public SortingRecordWriter() throws HyracksDataException { tb = new ArrayTupleBuilder(2); frame = ctx.allocateFrame(); fta = new FrameTupleAppender(ctx.getFrameSize()); fta.reset(frame, true);//from www . ja v a2 s . c o m } public void initBlock(int blockId) throws HyracksDataException { runGen = new ExternalSortRunGenerator(ctx, new int[] { 0 }, null, comparatorFactories, helper.getMapOutputRecordDescriptorWithoutExtraFields(), Algorithm.MERGE_SORT, framesLimit); this.blockId = blockId; } @Override public void close(TaskAttemptContext arg0) throws IOException, InterruptedException { } @Override public void write(K2 key, V2 value) throws IOException, InterruptedException { DataOutput dos = tb.getDataOutput(); tb.reset(); key.write(dos); tb.addFieldEndOffset(); value.write(dos); tb.addFieldEndOffset(); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { runGen.nextFrame(frame); fta.reset(frame, true); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + frame.capacity() + ")"); } } } public void sortAndFlushBlock(final IFrameWriter writer) throws HyracksDataException { if (fta.getTupleCount() > 0) { runGen.nextFrame(frame); fta.reset(frame, true); } runGen.close(); IFrameWriter delegatingWriter = new IFrameWriter() { private final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize()); private final ByteBuffer outFrame = ctx.allocateFrame(); private final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(), helper.getMapOutputRecordDescriptorWithoutExtraFields()); private final ArrayTupleBuilder tb = new ArrayTupleBuilder(3); @Override public void open() throws HyracksDataException { appender.reset(outFrame, true); } @Override public void nextFrame(ByteBuffer buffer) throws HyracksDataException { fta.reset(buffer); int n = fta.getTupleCount(); for (int i = 0; i < n; ++i) { tb.reset(); tb.addField(fta, i, 0); tb.addField(fta, i, 1); try { tb.getDataOutput().writeInt(blockId); } catch (IOException e) { throw new HyracksDataException(e); } tb.addFieldEndOffset(); if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { FrameUtils.flushFrame(outFrame, writer); appender.reset(outFrame, true); if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new IllegalStateException(); } } } } @Override public void close() throws HyracksDataException { if (appender.getTupleCount() > 0) { FrameUtils.flushFrame(outFrame, writer); } } @Override public void fail() throws HyracksDataException { // TODO Auto-generated method stub } }; if (helper.hasCombiner()) { Reducer<K2, V2, K2, V2> combiner = helper.getCombiner(); TaskAttemptID ctaId = new TaskAttemptID("foo", jobId, true, partition, 0); TaskAttemptContext ctaskAttemptContext = helper.createTaskAttemptContext(taId); final IFrameWriter outputWriter = delegatingWriter; RecordWriter<K2, V2> recordWriter = new RecordWriter<K2, V2>() { private final FrameTupleAppender fta = new FrameTupleAppender(ctx.getFrameSize()); private final ByteBuffer buffer = ctx.allocateFrame(); private final ArrayTupleBuilder tb = new ArrayTupleBuilder(2); { fta.reset(buffer, true); outputWriter.open(); } @Override public void write(K2 key, V2 value) throws IOException, InterruptedException { DataOutput dos = tb.getDataOutput(); tb.reset(); key.write(dos); tb.addFieldEndOffset(); value.write(dos); tb.addFieldEndOffset(); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { FrameUtils.flushFrame(buffer, outputWriter); fta.reset(buffer, true); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new IllegalStateException(); } } } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { if (fta.getTupleCount() > 0) { FrameUtils.flushFrame(buffer, outputWriter); outputWriter.close(); } } }; delegatingWriter = new ReduceWriter<K2, V2, K2, V2>(ctx, helper, new int[] { HadoopHelper.KEY_FIELD_INDEX }, helper.getGroupingComparatorFactories(), helper.getMapOutputRecordDescriptorWithoutExtraFields(), combiner, recordWriter, ctaId, ctaskAttemptContext); } IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length]; for (int i = 0; i < comparatorFactories.length; ++i) { comparators[i] = comparatorFactories[i].createBinaryComparator(); } ExternalSortRunMerger merger = new ExternalSortRunMerger(ctx, runGen.getFrameSorter(), runGen.getRuns(), new int[] { 0 }, comparators, null, helper.getMapOutputRecordDescriptorWithoutExtraFields(), framesLimit, delegatingWriter); merger.process(); } } return new AbstractUnaryOutputSourceOperatorNodePushable() { @Override public void initialize() throws HyracksDataException { writer.open(); try { SortingRecordWriter recordWriter = new SortingRecordWriter(); InputSplit split = null; int blockId = 0; while ((split = isp.next()) != null) { try { RecordReader<K1, V1> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); ClassLoader ctxCL = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); recordReader.initialize(split, taskAttemptContext); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } recordWriter.initBlock(blockId); Mapper<K1, V1, K2, V2>.Context mCtx = new MRContextUtil().createMapContext(conf, taId, recordReader, recordWriter, null, null, split); mapper.run(mCtx); recordReader.close(); recordWriter.sortAndFlushBlock(writer); ++blockId; } catch (IOException e) { throw new HyracksDataException(e); } catch (InterruptedException e) { throw new HyracksDataException(e); } } } finally { writer.close(); } } }; }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.mapreduce.ReducerOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException { final HadoopHelper helper = new HadoopHelper(mConfig); final Reducer<K2, V2, K3, V3> reducer = helper.getReducer(); final RecordDescriptor recordDescriptor = helper.getMapOutputRecordDescriptor(); final int[] groupFields = helper.getSortFields(); IBinaryComparatorFactory[] groupingComparators = helper.getGroupingComparatorFactories(); final TaskAttemptID taId = new TaskAttemptID("foo", jobId, false, partition, 0); final TaskAttemptContext taskAttemptContext = helper.createTaskAttemptContext(taId); final RecordWriter recordWriter; try {/*from ww w . j a v a 2 s . c o m*/ recordWriter = helper.getOutputFormat().getRecordWriter(taskAttemptContext); } catch (Exception e) { throw new HyracksDataException(e); } final ReduceWriter<K2, V2, K3, V3> rw = new ReduceWriter<K2, V2, K3, V3>(ctx, helper, groupFields, groupingComparators, recordDescriptor, reducer, recordWriter, taId, taskAttemptContext); return new AbstractUnaryInputSinkOperatorNodePushable() { @Override public void open() throws HyracksDataException { rw.open(); } @Override public void nextFrame(ByteBuffer buffer) throws HyracksDataException { rw.nextFrame(buffer); } @Override public void close() throws HyracksDataException { rw.close(); } @Override public void fail() throws HyracksDataException { } }; }
From source file:edu.uci.ics.hyracks.hdfs.ContextFactory.java
License:Apache License
public TaskAttemptContext createContext(Configuration conf, int partition) throws HyracksDataException { try {//from w ww .j a v a2 s . co m TaskAttemptID tid = new TaskAttemptID("", 0, TaskType.REDUCE, partition, 0); return new TaskAttemptContextImpl(conf, tid); } catch (Exception e) { throw new HyracksDataException(e); } }
From source file:eu.stratosphere.addons.hbase.GenericTableOutputFormat.java
License:Apache License
@Override public void open(int taskNumber, int numTasks) throws IOException { this.hadoopConfig = getHadoopConfig(this.config); /**//from www .j a v a2s . c om * PLASE NOTE: * If you are a Eclipse+Maven Integration user and you have two (or more) warnings here, please * close the pact-hbase project OR set the maven profile to hadoop_yarn * * pact-hbase requires hadoop_yarn, but Eclipse is not able to parse maven profiles properly. Therefore, * it imports the pact-hbase project even if it is not included in the standard profile (hadoop_v1) */ final TaskAttemptID attemptId = new TaskAttemptID(this.jtID, this.jobId, TaskType.MAP, taskNumber - 1, 0); this.context = new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(this.hadoopConfig, attemptId); final HFileOutputFormat outFormat = new HFileOutputFormat(); try { this.writer = outFormat.getRecordWriter(this.context); } catch (InterruptedException iex) { throw new IOException("Opening the writer was interrupted.", iex); } }
From source file:gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormatTest.java
License:Apache License
@Test public void testRecordReader() throws Exception { List<String> paths = Lists.newArrayList("/path1", "/path2"); GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths); GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat(); RecordReader<LongWritable, Text> recordReader = inputFormat.createRecordReader(split, new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1, TaskType.MAP, 1, 1))); recordReader.nextKeyValue();/* ww w. j av a 2s. c o m*/ Assert.assertEquals(recordReader.getCurrentKey().get(), 0); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1"); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 1); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2"); Assert.assertFalse(recordReader.nextKeyValue()); }
From source file:io.druid.storage.hdfs.HdfsDataSegmentPusherTest.java
License:Apache License
@Test public void shouldMakeHDFSCompliantSegmentOutputPath() { HadoopIngestionSpec schema;/* ww w. j a va 2 s . c o m*/ try { schema = objectMapper.readValue("{\n" + " \"dataSchema\": {\n" + " \"dataSource\": \"source\",\n" + " \"metricsSpec\": [],\n" + " \"granularitySpec\": {\n" + " \"type\": \"uniform\",\n" + " \"segmentGranularity\": \"hour\",\n" + " \"intervals\": [\"2012-07-10/P1D\"]\n" + " }\n" + " },\n" + " \"ioConfig\": {\n" + " \"type\": \"hadoop\",\n" + " \"segmentOutputPath\": \"hdfs://server:9100/tmp/druid/datatest\"\n" + " }\n" + "}", HadoopIngestionSpec.class); } catch (Exception e) { throw Throwables.propagate(e); } //DataSchema dataSchema = new DataSchema("dataSource", null, null, Gra) //schema = new HadoopIngestionSpec(dataSchema, ioConfig, HadoopTuningConfig.makeDefaultTuningConfig()); HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig( schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version"))); Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30), 4712); Path path = JobHelper .makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time) .get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), JobHelper.INDEX_ZIP, hdfsDataSegmentPusher); Assert.assertEquals( "hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_index.zip", path.toString()); path = JobHelper .makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time) .get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), JobHelper.DESCRIPTOR_JSON, hdfsDataSegmentPusher); Assert.assertEquals( "hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_descriptor.json", path.toString()); path = JobHelper .makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time) .get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0), hdfsDataSegmentPusher); Assert.assertEquals( "hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_index.zip.0", path.toString()); }
From source file:io.druid.storage.hdfs.HdfsDataSegmentPusherTest.java
License:Apache License
@Test public void shouldMakeDefaultSegmentOutputPathIfNotHDFS() { final HadoopIngestionSpec schema; try {/*from w w w .java2s . co m*/ schema = objectMapper.readValue( "{\n" + " \"dataSchema\": {\n" + " \"dataSource\": \"the:data:source\",\n" + " \"metricsSpec\": [],\n" + " \"granularitySpec\": {\n" + " \"type\": \"uniform\",\n" + " \"segmentGranularity\": \"hour\",\n" + " \"intervals\": [\"2012-07-10/P1D\"]\n" + " }\n" + " },\n" + " \"ioConfig\": {\n" + " \"type\": \"hadoop\",\n" + " \"segmentOutputPath\": \"/tmp/dru:id/data:test\"\n" + " }\n" + "}", HadoopIngestionSpec.class); } catch (Exception e) { throw Throwables.propagate(e); } HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig( schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version"))); Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30), 4712); Path path = JobHelper .makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time) .get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), JobHelper.INDEX_ZIP, new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig(), objectMapper)); Assert.assertEquals( "file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:" + "version/4712/index.zip", path.toString()); path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), JobHelper.DESCRIPTOR_JSON, new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig(), objectMapper)); Assert.assertEquals( "file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:" + "version/4712/descriptor.json", path.toString()); path = JobHelper .makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time) .get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0), new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig(), objectMapper)); Assert.assertEquals( "file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:" + "version/4712/index.zip.0", path.toString()); }
From source file:mvm.rya.accumulo.pig.AccumuloStorageTest.java
License:Apache License
protected List<AccumuloStorage> createAccumuloStorages(String location) throws IOException, InterruptedException { List<AccumuloStorage> accumuloStorages = new ArrayList<AccumuloStorage>(); AccumuloStorage storage = new AccumuloStorage(); InputFormat inputFormat = storage.getInputFormat(); Job job = new Job(new Configuration()); storage.setLocation(location, job);// www . ja v a2 s . c o m List<InputSplit> splits = inputFormat.getSplits(job); assertNotNull(splits); for (InputSplit inputSplit : splits) { storage = new AccumuloStorage(); job = new Job(new Configuration()); storage.setLocation(location, job); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("jtid", 0, false, 0, 0)); RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext); recordReader.initialize(inputSplit, taskAttemptContext); storage.prepareToRead(recordReader, null); accumuloStorages.add(storage); } return accumuloStorages; }