Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(Configuration conf) {
    TaskAttemptContext context = Mockito.mock(TaskAttemptContext.class);
    Mockito.when(context.getConfiguration()).thenReturn(conf);
    TaskAttemptID taskId = new TaskAttemptID("200707121733", 1, false, 1, 1);
    Mockito.when(context.getTaskAttemptID()).thenReturn(taskId);
    return context;
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyOutputFormat.java

License:Apache License

@Test
public void testGetOutputCommitter() {
    try {//from  ww w .ja v a 2 s .c o  m
        TaskAttemptContext context = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(context.getTaskAttemptID()).thenReturn(new TaskAttemptID("200707121733", 1, false, 1, 1));
        Configuration conf = new Configuration();
        Mockito.when(context.getConfiguration()).thenReturn(conf);
        context.getConfiguration().set("mapred.output.dir", "/out");
        Assert.assertTrue(new CopyOutputFormat().getOutputCommitter(context) instanceof CopyCommitter);
    } catch (IOException e) {
        LOG.error("Exception encountered ", e);
        Assert.fail("Unable to get output committer");
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java

License:Apache License

public void testGetSplits(int nMaps) throws Exception {
    DistCpOptions options = getOptions(nMaps);
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq");
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options);

    JobContext jobContext = Mockito.mock(JobContext.class);
    Mockito.when(jobContext.getConfiguration()).thenReturn(configuration);
    Mockito.when(jobContext.getJobID()).thenReturn(new JobID());
    UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
    List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext);

    //Removing the legacy check - Refer HADOOP-9230
    int sizePerMap = totalFileSize / nMaps;

    checkSplits(listFile, splits);/* ww  w  .  j  a v  a  2 s.  c  o m*/

    int doubleCheckedTotalSize = 0;
    int previousSplitSize = -1;
    for (int i = 0; i < splits.size(); ++i) {
        InputSplit split = splits.get(i);
        int currentSplitSize = 0;
        TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0);
        final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId);
        RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split,
                taskAttemptContext);
        recordReader.initialize(split, taskAttemptContext);
        while (recordReader.nextKeyValue()) {
            Path sourcePath = recordReader.getCurrentValue().getPath();
            FileSystem fs = sourcePath.getFileSystem(configuration);
            FileStatus fileStatus[] = fs.listStatus(sourcePath);
            Assert.assertEquals(fileStatus.length, 1);
            currentSplitSize += fileStatus[0].getLen();
        }
        Assert.assertTrue(previousSplitSize == -1
                || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1);

        doubleCheckedTotalSize += currentSplitSize;
    }

    Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}

From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(Configuration config, TaskAttemptID taskId) {
    TaskAttemptContext localContext = Mockito.mock(TaskAttemptContext.class);
    Mockito.when(localContext.getConfiguration()).thenReturn(config);
    Mockito.when(localContext.getTaskAttemptID()).thenReturn(taskId);
    return localContext;
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileOutputFormat.java

License:Apache License

/**
 * Retrieve a record writer for this RecordWriter. There are three config properties that are supported:
 * com.metamx.milano.hadoop.filePrefix -- A string to prefix the written file names with.
 * com.metamx.milano.hadoop.filePath   -- A string to postfix on the path. This lets you specify a subdirectory in which to put the files.
 * com.metamx.milano.proto.descriptor.base64 -- A string representing a base64 encoded DescriptorProto converted to bytes.
 * This is overridden if the metadata has already been set.
 *
 * @param job The {@link TaskAttemptContext} to use. See above for specific options.
 *
 * @return A {@link RecordWriter}/*w ww. j  av  a  2s .  com*/
 *
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public RecordWriter<K, Message> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    log.debug(String.format("Retrieving record writer"));
    Configuration conf = job.getConfiguration();

    String prefix = conf.get("com.metamx.milano.hadoop.filePrefix", "");
    String path = conf.get("com.metamx.milano.hadoop.filePath", ".");

    if (metadata == null) {
        String descriptorBytes = conf.get("com.metamx.milano.proto.descriptor.base64");
        if (descriptorBytes != null) {
            metadata = MilanoTool.withBase64(descriptorBytes).getMetadata();
        }
    }

    String filename = "";
    if (!prefix.equals("")) {
        filename = filename.concat(prefix + "_");
    }
    filename = filename.concat(job.getTaskAttemptID().getTaskID().toString());
    Path directory = new Path(((FileOutputCommitter) getOutputCommitter(job)).getWorkPath(), path);

    Path file = new Path(directory, filename);
    FileSystem fs = file.getFileSystem(conf);

    final OutputStream outputStream = fs.create(file);

    return new RecordWriter<K, Message>() {
        private MilanoProtoFile.Writer writer = MilanoProtoFile.createWriter(outputStream, metadata);

        @Override
        public void write(K key, Message value) throws IOException, InterruptedException {
            writer.write(value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            writer.flush();
            writer.close();
            log.debug("Closed Writer");
        }
    };
}

From source file:com.nearinfinity.blur.mapreduce.BlurTask.java

License:Apache License

public String getShardName(TaskAttemptContext context) {
    TaskAttemptID taskAttemptID = context.getTaskAttemptID();
    int id = taskAttemptID.getTaskID().getId();
    return BlurUtil.getShardName(BlurConstants.SHARD_PREFIX, id);
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

private static int getTaskId(TaskAttemptContext context) {
    return context.getTaskAttemptID().getTaskID().getId();
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

private static int getAttemptId(TaskAttemptContext context) {
    return context.getTaskAttemptID().getId();
}

From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java

License:Apache License

private static Set<String> commitTask(S3MultipartOutputCommitter committer, TaskAttemptContext tac,
        int numFiles) throws IOException {
    Path attemptPath = committer.getTaskAttemptPath(tac);

    Set<String> files = Sets.newHashSet();
    for (int i = 0; i < numFiles; i += 1) {
        Path outPath = writeOutputFile(tac.getTaskAttemptID(), attemptPath, UUID.randomUUID().toString(),
                10 * (i + 1));// w ww  .  j  av  a 2s .co  m
        files.add(KEY_PREFIX + "/" + outPath.getName() + "-" + committer.getUUID());
    }

    committer.commitTask(tac);

    return files;
}

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormat2.java

License:Apache License

static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter(
        final TaskAttemptContext context) throws IOException, InterruptedException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);// ww  w . j a  va  2 s .c om

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, V>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);

            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();
            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory exists
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /* Create a new StoreFile.Writer.
         * @param family
         * @return A WriterLength, containing a new StoreFile.Writer.
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}