Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:gobblin.runtime.mapreduce.GobblinOutputCommitter.java

License:Apache License

@Override
public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException {
    return this.attemptIdToMultiTaskAttempt.containsKey(arg0.getTaskAttemptID().toString());
}

From source file:gr.ntua.h2rdf.inputFormat.MultiHFileOutputFormat.java

License:Open Source License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {

        @Override/*  www . j  av a2s  .c  om*/
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) {
                writer.close(context);
            }
        }

        @Override
        public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException {
            RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key);
            if (writer == null) {
                final Path outputPath = new Path(
                        FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get()));
                writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() {
                    final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
                    final Path outputdir = committer.getWorkPath();
                    final Configuration conf = context.getConfiguration();
                    final FileSystem fs = outputdir.getFileSystem(conf);
                    final long maxsize = conf.getLong("hbase.hregion.max.filesize",
                            HConstants.DEFAULT_MAX_FILE_SIZE);
                    final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE);
                    // Invented config.  Add to hbase-*.xml if other than default compression.
                    final String compression = conf.get("hfile.compression",
                            Compression.Algorithm.NONE.getName());

                    // Map of families to writers and how much has been output on the writer.
                    final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                            Bytes.BYTES_COMPARATOR);
                    byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
                    final byte[] now = Bytes.toBytes(System.currentTimeMillis());
                    boolean rollRequested = false;

                    public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
                        // null input == user explicitly wants to flush
                        if (row == null && kv == null) {
                            rollWriters();
                            return;
                        }

                        byte[] rowKey = kv.getRow();
                        long length = kv.getLength();
                        byte[] family = kv.getFamily();
                        WriterLength wl = this.writers.get(family);

                        // If this is a new column family, verify that the directory exists
                        if (wl == null) {
                            fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
                        }

                        // If any of the HFiles for the column families has reached
                        // maxsize, we need to roll all the writers
                        if (wl != null && wl.written + length >= maxsize) {
                            this.rollRequested = true;
                        }

                        // This can only happen once a row is finished though
                        if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                            rollWriters();
                        }

                        // create a new HLog writer, if necessary
                        if (wl == null || wl.writer == null) {
                            wl = getNewWriter(family);
                        }

                        // we now have the proper HLog writer. full steam ahead
                        kv.updateLatestStamp(this.now);
                        wl.writer.append(kv);
                        wl.written += length;

                        // Copy the row so we know when a row transition.
                        this.previousRow = rowKey;
                    }

                    private void rollWriters() throws IOException {
                        for (WriterLength wl : this.writers.values()) {
                            if (wl.writer != null) {
                                close(wl.writer);
                            }
                            wl.writer = null;
                            wl.written = 0;
                        }
                        this.rollRequested = false;
                    }

                    private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir,
                            Configuration conf) throws IOException {
                        if (writer != null) {
                            close(writer);
                        }

                        return HFile.getWriterFactoryNoCache(conf).create();
                        //return HFile.getWriterFactory(conf).createWriter(fs,  StoreFile.getUniqueFile(fs, familydir),
                        //        blocksize, compression, KeyValue.KEY_COMPARATOR);

                        // return new HFile.Writer(fs,  StoreFile.getUniqueFile(fs, familydir),
                        //         blocksize, compression, KeyValue.KEY_COMPARATOR);
                    }

                    private WriterLength getNewWriter(byte[] family) throws IOException {
                        WriterLength wl = new WriterLength();
                        Path familydir = new Path(outputdir, Bytes.toString(family));
                        wl.writer = getNewWriter(wl.writer, familydir, conf);
                        this.writers.put(family, wl);
                        return wl;
                    }

                    private void close(final HFile.Writer w) throws IOException {
                        if (w != null) {
                            w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
                                    Bytes.toBytes(System.currentTimeMillis()));
                            w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                                    Bytes.toBytes(context.getTaskAttemptID().toString()));
                            w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                            w.close();
                        }
                    }

                    public void close(TaskAttemptContext c) throws IOException, InterruptedException {
                        for (WriterLength wl : this.writers.values()) {
                            close(wl.writer);
                        }
                        committer.commitTask(c);
                    }
                };

                writers.put(key, writer);
            }

            writer.write(new ImmutableBytesWritable(value.getRow()), value);
        }
    };
}

From source file:input_format.MultiHFileOutputFormat.java

License:Open Source License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {

        @Override//from   w w  w . j ava 2s .  co  m
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) {
                writer.close(context);
            }
        }

        @Override
        public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException {
            RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key);
            if (writer == null) {
                final Path outputPath = new Path(
                        FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get()));
                writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() {
                    final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
                    final Path outputdir = committer.getWorkPath();
                    final Configuration conf = context.getConfiguration();
                    final FileSystem fs = outputdir.getFileSystem(conf);
                    final long maxsize = conf.getLong("hbase.hregion.max.filesize",
                            HConstants.DEFAULT_MAX_FILE_SIZE);
                    final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE);
                    // Invented config.  Add to hbase-*.xml if other than default compression.
                    final String compression = conf.get("hfile.compression",
                            Compression.Algorithm.NONE.getName());

                    // Map of families to writers and how much has been output on the writer.
                    final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                            Bytes.BYTES_COMPARATOR);
                    byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
                    final byte[] now = Bytes.toBytes(System.currentTimeMillis());
                    boolean rollRequested = false;

                    public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
                        // null input == user explicitly wants to flush
                        if (row == null && kv == null) {
                            rollWriters();
                            return;
                        }

                        byte[] rowKey = kv.getRow();
                        long length = kv.getLength();
                        byte[] family = kv.getFamily();
                        WriterLength wl = this.writers.get(family);

                        // If this is a new column family, verify that the directory exists
                        if (wl == null) {
                            fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
                        }

                        // If any of the HFiles for the column families has reached
                        // maxsize, we need to roll all the writers
                        if (wl != null && wl.written + length >= maxsize) {
                            this.rollRequested = true;
                        }

                        // This can only happen once a row is finished though
                        if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                            rollWriters();
                        }

                        // create a new HLog writer, if necessary
                        if (wl == null || wl.writer == null) {
                            wl = getNewWriter(family);
                        }

                        // we now have the proper HLog writer. full steam ahead
                        kv.updateLatestStamp(this.now);
                        wl.writer.append(kv);
                        wl.written += length;

                        // Copy the row so we know when a row transition.
                        this.previousRow = rowKey;
                    }

                    private void rollWriters() throws IOException {
                        for (WriterLength wl : this.writers.values()) {
                            if (wl.writer != null) {
                                close(wl.writer);
                            }
                            wl.writer = null;
                            wl.written = 0;
                        }
                        this.rollRequested = false;
                    }

                    private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir,
                            Configuration conf) throws IOException {
                        if (writer != null) {
                            close(writer);
                        }

                        return HFile.getWriterFactory(conf).createWriter(fs,
                                StoreFile.getUniqueFile(fs, familydir), blocksize, compression,
                                KeyValue.KEY_COMPARATOR);

                        // return new HFile.Writer(fs,  StoreFile.getUniqueFile(fs, familydir),
                        //         blocksize, compression, KeyValue.KEY_COMPARATOR);
                    }

                    private WriterLength getNewWriter(byte[] family) throws IOException {
                        WriterLength wl = new WriterLength();
                        Path familydir = new Path(outputdir, Bytes.toString(family));
                        wl.writer = getNewWriter(wl.writer, familydir, conf);
                        this.writers.put(family, wl);
                        return wl;
                    }

                    private void close(final HFile.Writer w) throws IOException {
                        if (w != null) {
                            w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
                                    Bytes.toBytes(System.currentTimeMillis()));
                            w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                                    Bytes.toBytes(context.getTaskAttemptID().toString()));
                            w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                            w.close();
                        }
                    }

                    public void close(TaskAttemptContext c) throws IOException, InterruptedException {
                        for (WriterLength wl : this.writers.values()) {
                            close(wl.writer);
                        }
                        committer.commitTask(c);
                    }
                };

                writers.put(key, writer);
            }

            writer.write(new ImmutableBytesWritable(value.getRow()), value);
        }
    };
}

From source file:kafka.bridge.hadoop.KafkaOutputFormat.java

License:Apache License

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    // Is there a programmatic way to get the temp dir? I see it hardcoded everywhere in Hadoop, Hive, and Pig.
    return new FileOutputCommitter(
            new Path("/tmp/" + taskAttemptContext.getTaskAttemptID().getJobID().toString()),
            taskAttemptContext);// w ww. j  av  a  2 s  .  co m
}

From source file:org.apache.avro.mapreduce.TestAvroKeyOutputFormat.java

License:Apache License

/**
 * Tests that the record writer is constructed and returned correctly from the output format.
 *//*from   w  w  w .  j a va  2 s.c o  m*/
private void testGetRecordWriter(Configuration conf, CodecFactory expectedCodec) throws IOException {
    // Configure a mock task attempt context.
    Job job = new Job(conf);
    job.getConfiguration().set("mapred.output.dir", mTempDir.getRoot().getPath());
    Schema writerSchema = Schema.create(Schema.Type.INT);
    AvroJob.setOutputKeySchema(job, writerSchema);
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(job.getConfiguration()).anyTimes();
    expect(context.getTaskAttemptID()).andReturn(TaskAttemptID.forName("attempt_200707121733_0001_m_000000_0"))
            .anyTimes();
    expect(context.getNumReduceTasks()).andReturn(1);

    // Create a mock record writer.
    @SuppressWarnings("unchecked")
    RecordWriter<AvroKey<Integer>, NullWritable> expectedRecordWriter = createMock(RecordWriter.class);
    AvroKeyOutputFormat.RecordWriterFactory recordWriterFactory = createMock(
            AvroKeyOutputFormat.RecordWriterFactory.class);

    // Expect the record writer factory to be called with appropriate parameters.
    Capture<CodecFactory> capturedCodecFactory = new Capture<CodecFactory>();
    expect(recordWriterFactory.create(eq(writerSchema), anyObject(GenericData.class),
            capture(capturedCodecFactory), // Capture for comparison later.
            anyObject(OutputStream.class))).andReturn(expectedRecordWriter);

    replay(context);
    replay(expectedRecordWriter);
    replay(recordWriterFactory);

    AvroKeyOutputFormat<Integer> outputFormat = new AvroKeyOutputFormat<Integer>(recordWriterFactory);
    RecordWriter<AvroKey<Integer>, NullWritable> recordWriter = outputFormat.getRecordWriter(context);
    // Make sure the expected codec was used.
    assertTrue(capturedCodecFactory.hasCaptured());
    assertEquals(expectedCodec.toString(), capturedCodecFactory.getValue().toString());

    verify(context);
    verify(expectedRecordWriter);
    verify(recordWriterFactory);

    assertNotNull(recordWriter);
    assertTrue(expectedRecordWriter == recordWriter);
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java

License:Apache License

@Override
public boolean needsTaskCommit(TaskAttemptContext context) throws IOException {
    int numReduceTasks = context.getNumReduceTasks();
    TaskAttemptID taskAttemptID = context.getTaskAttemptID();
    return taskAttemptID.isMap() && numReduceTasks != 0 ? false : true;
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java

License:Apache License

private Conf setup(TaskAttemptContext context) throws IOException {
    LOG.info("Setting up committer with task attempt [{0}]", context.getTaskAttemptID().toString());
    Conf conf = new Conf();
    conf._configuration = context.getConfiguration();
    conf._tableDescriptor = BlurOutputFormat.getTableDescriptor(conf._configuration);
    int shardCount = conf._tableDescriptor.getShardCount();
    int attemptId = context.getTaskAttemptID().getTaskID().getId();
    int shardId = attemptId % shardCount;
    conf._taskAttemptID = context.getTaskAttemptID();
    Path tableOutput = BlurOutputFormat.getOutputPath(conf._configuration);
    String shardName = ShardUtil.getShardName(BlurConstants.SHARD_PREFIX, shardId);
    conf._indexPath = new Path(tableOutput, shardName);
    conf._newIndex = new Path(conf._indexPath, conf._taskAttemptID.toString() + ".tmp");
    return conf;//from   w ww. jav a2  s .  co  m
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, BlurMutate> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    int id = context.getTaskAttemptID().getTaskID().getId();
    TaskAttemptID taskAttemptID = context.getTaskAttemptID();
    final GenericBlurRecordWriter writer = new GenericBlurRecordWriter(context.getConfiguration(), id,
            taskAttemptID.toString() + ".tmp");
    return new RecordWriter<Text, BlurMutate>() {

        @Override//  w w  w.  ja  v  a  2  s  .  c o m
        public void write(Text key, BlurMutate value) throws IOException, InterruptedException {
            writer.write(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            writer.close();
        }
    };
}

From source file:org.apache.crunch.impl.mr.run.CrunchRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    this.crunchSplit = (CrunchInputSplit) inputSplit;
    this.context = context;
    Configuration conf = crunchSplit.getConf();
    if (conf == null) {
        conf = context.getConfiguration();
        crunchSplit.setConf(conf);//www .  java 2 s.c  o  m
    }
    if (crunchSplit.getInputSplit() instanceof CombineFileSplit) {
        combineFileSplit = (CombineFileSplit) crunchSplit.getInputSplit();
    }
    if (curReader != null) {
        curReader.initialize(getDelegateSplit(),
                TaskAttemptContextFactory.create(conf, context.getTaskAttemptID()));
    }
}

From source file:org.apache.crunch.io.hbase.HFileOutputFormatForCrunch.java

License:Apache License

@Override
public RecordWriter<Object, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path outputPath = getDefaultWorkFile(context, "");
    Configuration conf = context.getConfiguration();
    FileSystem fs = outputPath.getFileSystem(conf);

    final boolean compactionExclude = conf.getBoolean(COMPACTION_EXCLUDE_CONF_KEY, false);

    String hcolStr = conf.get(HCOLUMN_DESCRIPTOR_KEY);
    if (hcolStr == null) {
        throw new AssertionError(HCOLUMN_DESCRIPTOR_KEY + " is not set in conf");
    }/*w w  w .j  a v  a  2 s.  c  o  m*/
    byte[] hcolBytes;
    try {
        hcolBytes = Hex.decodeHex(hcolStr.toCharArray());
    } catch (DecoderException e) {
        throw new AssertionError("Bad hex string: " + hcolStr);
    }
    HColumnDescriptor hcol = new HColumnDescriptor();
    hcol.readFields(new DataInputStream(new ByteArrayInputStream(hcolBytes)));
    LOG.info("Output path: " + outputPath);
    LOG.info("HColumnDescriptor: " + hcol.toString());
    final HFile.Writer writer = HFile.getWriterFactoryNoCache(conf).withPath(fs, outputPath)
            .withBlockSize(hcol.getBlocksize()).withCompression(hcol.getCompression())
            .withComparator(KeyValue.KEY_COMPARATOR)
            .withDataBlockEncoder(new HFileDataBlockEncoderImpl(hcol.getDataBlockEncoding()))
            .withChecksumType(Store.getChecksumType(conf)).withBytesPerChecksum(Store.getBytesPerChecksum(conf))
            .create();

    return new RecordWriter<Object, KeyValue>() {
        @Override
        public void write(Object row, KeyValue kv) throws IOException {
            if (kv.getTimestamp() == HConstants.LATEST_TIMESTAMP) {
                kv.updateLatestStamp(now);
            }
            writer.append(kv);
            trt.includeTimestamp(kv);
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
            writer.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                    Bytes.toBytes(context.getTaskAttemptID().toString()));
            writer.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
            writer.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY,
                    Bytes.toBytes(compactionExclude));
            writer.appendFileInfo(StoreFile.TIMERANGE_KEY, WritableUtils.toByteArray(trt));
            writer.close();
        }
    };
}