Example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID.

Prototype

@Deprecated
public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap, int taskId, int id)

Source Link

Document

Constructs a TaskId object from given parts.

Usage

From source file:com.twitter.scalding.parquet.scrooge.ParquetScroogeSchemeTest.java

License:Apache License

private void writeParquetFile(List<TBase> recordsToWrite, Configuration conf, Path parquetFile)
        throws IOException, InterruptedException, org.apache.thrift.TException {
    //create a test file
    final TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    Class writeClass = recordsToWrite.get(0).getClass();
    final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile,
            ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, writeClass);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));
    for (TBase recordToWrite : recordsToWrite) {
        recordToWrite.write(protocol);/*from   w  ww  .ja v  a 2 s . c om*/
    }
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();
}

From source file:cz.seznam.euphoria.hadoop.HadoopUtils.java

License:Apache License

public static TaskAttemptContext createTaskContext(Configuration conf, int taskNumber) {
    // TODO uses some default hard-coded values
    TaskAttemptID taskAttemptID = new TaskAttemptID("0", // job tracker ID
            0, // job number,
            TaskType.REDUCE, // task type,
            taskNumber, // task ID
            0); // task attempt
    return new TaskAttemptContextImpl(conf, taskAttemptID);
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.mapreduce.MapperOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
        throws HyracksDataException {
    final HadoopHelper helper = new HadoopHelper(config);
    final Configuration conf = helper.getConfiguration();
    final Mapper<K1, V1, K2, V2> mapper = helper.getMapper();
    final InputFormat<K1, V1> inputFormat = helper.getInputFormat();
    final IInputSplitProvider isp = factory.createInputSplitProvider(partition);
    final TaskAttemptID taId = new TaskAttemptID("foo", jobId, true, partition, 0);
    final TaskAttemptContext taskAttemptContext = helper.createTaskAttemptContext(taId);

    final int framesLimit = helper.getSortFrameLimit(ctx);
    final IBinaryComparatorFactory[] comparatorFactories = helper.getSortComparatorFactories();

    class SortingRecordWriter extends RecordWriter<K2, V2> {
        private final ArrayTupleBuilder tb;
        private final ByteBuffer frame;
        private final FrameTupleAppender fta;
        private ExternalSortRunGenerator runGen;
        private int blockId;

        public SortingRecordWriter() throws HyracksDataException {
            tb = new ArrayTupleBuilder(2);
            frame = ctx.allocateFrame();
            fta = new FrameTupleAppender(ctx.getFrameSize());
            fta.reset(frame, true);//from   www  .  ja  v  a2  s .  c  o  m
        }

        public void initBlock(int blockId) throws HyracksDataException {
            runGen = new ExternalSortRunGenerator(ctx, new int[] { 0 }, null, comparatorFactories,
                    helper.getMapOutputRecordDescriptorWithoutExtraFields(), Algorithm.MERGE_SORT, framesLimit);
            this.blockId = blockId;
        }

        @Override
        public void close(TaskAttemptContext arg0) throws IOException, InterruptedException {
        }

        @Override
        public void write(K2 key, V2 value) throws IOException, InterruptedException {
            DataOutput dos = tb.getDataOutput();
            tb.reset();
            key.write(dos);
            tb.addFieldEndOffset();
            value.write(dos);
            tb.addFieldEndOffset();
            if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                runGen.nextFrame(frame);
                fta.reset(frame, true);
                if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                    throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size ("
                            + frame.capacity() + ")");
                }
            }
        }

        public void sortAndFlushBlock(final IFrameWriter writer) throws HyracksDataException {
            if (fta.getTupleCount() > 0) {
                runGen.nextFrame(frame);
                fta.reset(frame, true);
            }
            runGen.close();
            IFrameWriter delegatingWriter = new IFrameWriter() {
                private final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize());
                private final ByteBuffer outFrame = ctx.allocateFrame();
                private final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(),
                        helper.getMapOutputRecordDescriptorWithoutExtraFields());
                private final ArrayTupleBuilder tb = new ArrayTupleBuilder(3);

                @Override
                public void open() throws HyracksDataException {
                    appender.reset(outFrame, true);
                }

                @Override
                public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
                    fta.reset(buffer);
                    int n = fta.getTupleCount();
                    for (int i = 0; i < n; ++i) {
                        tb.reset();
                        tb.addField(fta, i, 0);
                        tb.addField(fta, i, 1);
                        try {
                            tb.getDataOutput().writeInt(blockId);
                        } catch (IOException e) {
                            throw new HyracksDataException(e);
                        }
                        tb.addFieldEndOffset();
                        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            FrameUtils.flushFrame(outFrame, writer);
                            appender.reset(outFrame, true);
                            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                                throw new IllegalStateException();
                            }
                        }
                    }
                }

                @Override
                public void close() throws HyracksDataException {
                    if (appender.getTupleCount() > 0) {
                        FrameUtils.flushFrame(outFrame, writer);
                    }
                }

                @Override
                public void fail() throws HyracksDataException {
                    // TODO Auto-generated method stub

                }
            };
            if (helper.hasCombiner()) {
                Reducer<K2, V2, K2, V2> combiner = helper.getCombiner();
                TaskAttemptID ctaId = new TaskAttemptID("foo", jobId, true, partition, 0);
                TaskAttemptContext ctaskAttemptContext = helper.createTaskAttemptContext(taId);
                final IFrameWriter outputWriter = delegatingWriter;
                RecordWriter<K2, V2> recordWriter = new RecordWriter<K2, V2>() {
                    private final FrameTupleAppender fta = new FrameTupleAppender(ctx.getFrameSize());
                    private final ByteBuffer buffer = ctx.allocateFrame();
                    private final ArrayTupleBuilder tb = new ArrayTupleBuilder(2);

                    {
                        fta.reset(buffer, true);
                        outputWriter.open();
                    }

                    @Override
                    public void write(K2 key, V2 value) throws IOException, InterruptedException {
                        DataOutput dos = tb.getDataOutput();
                        tb.reset();
                        key.write(dos);
                        tb.addFieldEndOffset();
                        value.write(dos);
                        tb.addFieldEndOffset();
                        if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            FrameUtils.flushFrame(buffer, outputWriter);
                            fta.reset(buffer, true);
                            if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                                throw new IllegalStateException();
                            }
                        }
                    }

                    @Override
                    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
                        if (fta.getTupleCount() > 0) {
                            FrameUtils.flushFrame(buffer, outputWriter);
                            outputWriter.close();
                        }
                    }
                };
                delegatingWriter = new ReduceWriter<K2, V2, K2, V2>(ctx, helper,
                        new int[] { HadoopHelper.KEY_FIELD_INDEX }, helper.getGroupingComparatorFactories(),
                        helper.getMapOutputRecordDescriptorWithoutExtraFields(), combiner, recordWriter, ctaId,
                        ctaskAttemptContext);
            }
            IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
            for (int i = 0; i < comparatorFactories.length; ++i) {
                comparators[i] = comparatorFactories[i].createBinaryComparator();
            }
            ExternalSortRunMerger merger = new ExternalSortRunMerger(ctx, runGen.getFrameSorter(),
                    runGen.getRuns(), new int[] { 0 }, comparators, null,
                    helper.getMapOutputRecordDescriptorWithoutExtraFields(), framesLimit, delegatingWriter);
            merger.process();
        }
    }

    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        @Override
        public void initialize() throws HyracksDataException {
            writer.open();
            try {
                SortingRecordWriter recordWriter = new SortingRecordWriter();
                InputSplit split = null;
                int blockId = 0;
                while ((split = isp.next()) != null) {
                    try {
                        RecordReader<K1, V1> recordReader = inputFormat.createRecordReader(split,
                                taskAttemptContext);
                        ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
                        try {
                            Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
                            recordReader.initialize(split, taskAttemptContext);
                        } finally {
                            Thread.currentThread().setContextClassLoader(ctxCL);
                        }
                        recordWriter.initBlock(blockId);
                        Mapper<K1, V1, K2, V2>.Context mCtx = new MRContextUtil().createMapContext(conf, taId,
                                recordReader, recordWriter, null, null, split);
                        mapper.run(mCtx);
                        recordReader.close();
                        recordWriter.sortAndFlushBlock(writer);
                        ++blockId;
                    } catch (IOException e) {
                        throw new HyracksDataException(e);
                    } catch (InterruptedException e) {
                        throw new HyracksDataException(e);
                    }
                }
            } finally {
                writer.close();
            }
        }
    };
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.mapreduce.ReducerOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions)
        throws HyracksDataException {
    final HadoopHelper helper = new HadoopHelper(mConfig);
    final Reducer<K2, V2, K3, V3> reducer = helper.getReducer();
    final RecordDescriptor recordDescriptor = helper.getMapOutputRecordDescriptor();
    final int[] groupFields = helper.getSortFields();
    IBinaryComparatorFactory[] groupingComparators = helper.getGroupingComparatorFactories();

    final TaskAttemptID taId = new TaskAttemptID("foo", jobId, false, partition, 0);
    final TaskAttemptContext taskAttemptContext = helper.createTaskAttemptContext(taId);
    final RecordWriter recordWriter;
    try {/*from ww w . j  a  v  a 2 s  .  c o m*/
        recordWriter = helper.getOutputFormat().getRecordWriter(taskAttemptContext);
    } catch (Exception e) {
        throw new HyracksDataException(e);
    }

    final ReduceWriter<K2, V2, K3, V3> rw = new ReduceWriter<K2, V2, K3, V3>(ctx, helper, groupFields,
            groupingComparators, recordDescriptor, reducer, recordWriter, taId, taskAttemptContext);

    return new AbstractUnaryInputSinkOperatorNodePushable() {
        @Override
        public void open() throws HyracksDataException {
            rw.open();
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            rw.nextFrame(buffer);
        }

        @Override
        public void close() throws HyracksDataException {
            rw.close();
        }

        @Override
        public void fail() throws HyracksDataException {
        }
    };
}

From source file:edu.uci.ics.hyracks.hdfs.ContextFactory.java

License:Apache License

public TaskAttemptContext createContext(Configuration conf, int partition) throws HyracksDataException {
    try {//from w  ww .j  a  v a2 s .  co m
        TaskAttemptID tid = new TaskAttemptID("", 0, TaskType.REDUCE, partition, 0);
        return new TaskAttemptContextImpl(conf, tid);
    } catch (Exception e) {
        throw new HyracksDataException(e);
    }
}

From source file:eu.stratosphere.addons.hbase.GenericTableOutputFormat.java

License:Apache License

@Override
public void open(int taskNumber, int numTasks) throws IOException {
    this.hadoopConfig = getHadoopConfig(this.config);

    /**//from  www  .j a  v a2s  . c om
     * PLASE NOTE:
     * If you are a Eclipse+Maven Integration user and you have two (or more) warnings here, please
     * close the pact-hbase project OR set the maven profile to hadoop_yarn
     * 
     * pact-hbase requires hadoop_yarn, but Eclipse is not able to parse maven profiles properly. Therefore,
     * it imports the pact-hbase project even if it is not included in the standard profile (hadoop_v1)
     */
    final TaskAttemptID attemptId = new TaskAttemptID(this.jtID, this.jobId, TaskType.MAP, taskNumber - 1, 0);

    this.context = new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(this.hadoopConfig, attemptId);
    final HFileOutputFormat outFormat = new HFileOutputFormat();
    try {
        this.writer = outFormat.getRecordWriter(this.context);
    } catch (InterruptedException iex) {
        throw new IOException("Opening the writer was interrupted.", iex);
    }
}

From source file:gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormatTest.java

License:Apache License

@Test
public void testRecordReader() throws Exception {

    List<String> paths = Lists.newArrayList("/path1", "/path2");
    GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths);

    GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
    RecordReader<LongWritable, Text> recordReader = inputFormat.createRecordReader(split,
            new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1, TaskType.MAP, 1, 1)));

    recordReader.nextKeyValue();/* ww w.  j av  a  2s.  c  o  m*/
    Assert.assertEquals(recordReader.getCurrentKey().get(), 0);
    Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1");

    recordReader.nextKeyValue();
    Assert.assertEquals(recordReader.getCurrentKey().get(), 1);
    Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2");

    Assert.assertFalse(recordReader.nextKeyValue());

}

From source file:io.druid.storage.hdfs.HdfsDataSegmentPusherTest.java

License:Apache License

@Test
public void shouldMakeHDFSCompliantSegmentOutputPath() {
    HadoopIngestionSpec schema;/* ww w. j a va  2  s .  c  o m*/

    try {
        schema = objectMapper.readValue("{\n" + "    \"dataSchema\": {\n"
                + "        \"dataSource\": \"source\",\n" + "        \"metricsSpec\": [],\n"
                + "        \"granularitySpec\": {\n" + "            \"type\": \"uniform\",\n"
                + "            \"segmentGranularity\": \"hour\",\n"
                + "            \"intervals\": [\"2012-07-10/P1D\"]\n" + "        }\n" + "    },\n"
                + "    \"ioConfig\": {\n" + "        \"type\": \"hadoop\",\n"
                + "        \"segmentOutputPath\": \"hdfs://server:9100/tmp/druid/datatest\"\n" + "    }\n"
                + "}", HadoopIngestionSpec.class);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }

    //DataSchema dataSchema = new DataSchema("dataSource", null, null, Gra)
    //schema = new HadoopIngestionSpec(dataSchema, ioConfig, HadoopTuningConfig.makeDefaultTuningConfig());
    HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig(
            schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version")));

    Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30), 4712);
    Path path = JobHelper
            .makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()),
                    new DistributedFileSystem(),
                    new DataSegment(cfg.getSchema().getDataSchema().getDataSource(),
                            cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time)
                                    .get(),
                            cfg.getSchema().getTuningConfig().getVersion(), null, null, null,
                            new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1),
                    JobHelper.INDEX_ZIP, hdfsDataSegmentPusher);
    Assert.assertEquals(
            "hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version"
                    + "/4712_index.zip",
            path.toString());

    path = JobHelper
            .makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()),
                    new DistributedFileSystem(),
                    new DataSegment(cfg.getSchema().getDataSchema().getDataSource(),
                            cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time)
                                    .get(),
                            cfg.getSchema().getTuningConfig().getVersion(), null, null, null,
                            new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1),
                    JobHelper.DESCRIPTOR_JSON, hdfsDataSegmentPusher);
    Assert.assertEquals(
            "hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version"
                    + "/4712_descriptor.json",
            path.toString());

    path = JobHelper
            .makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()),
                    new DistributedFileSystem(),
                    new DataSegment(cfg.getSchema().getDataSchema().getDataSource(),
                            cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time)
                                    .get(),
                            cfg.getSchema().getTuningConfig().getVersion(), null, null, null,
                            new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1),
                    new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0), hdfsDataSegmentPusher);
    Assert.assertEquals(
            "hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version"
                    + "/4712_index.zip.0",
            path.toString());

}

From source file:io.druid.storage.hdfs.HdfsDataSegmentPusherTest.java

License:Apache License

@Test
public void shouldMakeDefaultSegmentOutputPathIfNotHDFS() {
    final HadoopIngestionSpec schema;

    try {/*from w  w w .java2s . co m*/
        schema = objectMapper.readValue(
                "{\n" + "    \"dataSchema\": {\n" + "        \"dataSource\": \"the:data:source\",\n"
                        + "        \"metricsSpec\": [],\n" + "        \"granularitySpec\": {\n"
                        + "            \"type\": \"uniform\",\n"
                        + "            \"segmentGranularity\": \"hour\",\n"
                        + "            \"intervals\": [\"2012-07-10/P1D\"]\n" + "        }\n" + "    },\n"
                        + "    \"ioConfig\": {\n" + "        \"type\": \"hadoop\",\n"
                        + "        \"segmentOutputPath\": \"/tmp/dru:id/data:test\"\n" + "    }\n" + "}",
                HadoopIngestionSpec.class);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }

    HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig(
            schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version")));

    Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30), 4712);
    Path path = JobHelper
            .makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()),
                    new LocalFileSystem(),
                    new DataSegment(cfg.getSchema().getDataSchema().getDataSource(),
                            cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time)
                                    .get(),
                            cfg.getSchema().getTuningConfig().getVersion(), null, null, null,
                            new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1),
                    JobHelper.INDEX_ZIP,
                    new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig(), objectMapper));
    Assert.assertEquals(
            "file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:"
                    + "version/4712/index.zip",
            path.toString());

    path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()),
            new LocalFileSystem(),
            new DataSegment(cfg.getSchema().getDataSchema().getDataSource(),
                    cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(),
                    cfg.getSchema().getTuningConfig().getVersion(), null, null, null,
                    new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1),
            JobHelper.DESCRIPTOR_JSON,
            new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig(), objectMapper));
    Assert.assertEquals(
            "file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:"
                    + "version/4712/descriptor.json",
            path.toString());

    path = JobHelper
            .makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(),
                    new DataSegment(cfg.getSchema().getDataSchema().getDataSource(),
                            cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time)
                                    .get(),
                            cfg.getSchema().getTuningConfig().getVersion(), null, null, null,
                            new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1),
                    new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0),
                    new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig(), objectMapper));
    Assert.assertEquals(
            "file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:"
                    + "version/4712/index.zip.0",
            path.toString());

}

From source file:mvm.rya.accumulo.pig.AccumuloStorageTest.java

License:Apache License

protected List<AccumuloStorage> createAccumuloStorages(String location)
        throws IOException, InterruptedException {
    List<AccumuloStorage> accumuloStorages = new ArrayList<AccumuloStorage>();
    AccumuloStorage storage = new AccumuloStorage();
    InputFormat inputFormat = storage.getInputFormat();
    Job job = new Job(new Configuration());
    storage.setLocation(location, job);// www . ja v  a2  s .  c  o m
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertNotNull(splits);

    for (InputSplit inputSplit : splits) {
        storage = new AccumuloStorage();
        job = new Job(new Configuration());
        storage.setLocation(location, job);
        TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
                new TaskAttemptID("jtid", 0, false, 0, 0));
        RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext);
        recordReader.initialize(inputSplit, taskAttemptContext);

        storage.prepareToRead(recordReader, null);
        accumuloStorages.add(storage);
    }
    return accumuloStorages;
}