Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyOutputFormat.java

License:Apache License

@Test
public void testGetOutputCommitter() {
    try {/*from ww w  . j  ava 2s . c o  m*/
        TaskAttemptContext context = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(context.getTaskAttemptID()).thenReturn(new TaskAttemptID("200707121733", 1, false, 1, 1));
        Configuration conf = new Configuration();
        Mockito.when(context.getConfiguration()).thenReturn(conf);
        context.getConfiguration().set("mapred.output.dir", "/out");
        Assert.assertTrue(new CopyOutputFormat().getOutputCommitter(context) instanceof CopyCommitter);
    } catch (IOException e) {
        LOG.error("Exception encountered ", e);
        Assert.fail("Unable to get output committer");
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java

License:Apache License

public void testGetSplits(int nMaps) throws Exception {
    DistCpOptions options = getOptions(nMaps);
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq");
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options);

    JobContext jobContext = Mockito.mock(JobContext.class);
    Mockito.when(jobContext.getConfiguration()).thenReturn(configuration);
    Mockito.when(jobContext.getJobID()).thenReturn(new JobID());
    UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
    List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext);

    //Removing the legacy check - Refer HADOOP-9230
    int sizePerMap = totalFileSize / nMaps;

    checkSplits(listFile, splits);/*from  w  w w. j  a v  a2  s . co  m*/

    int doubleCheckedTotalSize = 0;
    int previousSplitSize = -1;
    for (int i = 0; i < splits.size(); ++i) {
        InputSplit split = splits.get(i);
        int currentSplitSize = 0;
        TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0);
        final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId);
        RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split,
                taskAttemptContext);
        recordReader.initialize(split, taskAttemptContext);
        while (recordReader.nextKeyValue()) {
            Path sourcePath = recordReader.getCurrentValue().getPath();
            FileSystem fs = sourcePath.getFileSystem(configuration);
            FileStatus fileStatus[] = fs.listStatus(sourcePath);
            Assert.assertEquals(fileStatus.length, 1);
            currentSplitSize += fileStatus[0].getLen();
        }
        Assert.assertTrue(previousSplitSize == -1
                || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1);

        doubleCheckedTotalSize += currentSplitSize;
    }

    Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}

From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(Configuration config, TaskAttemptID taskId) {
    TaskAttemptContext localContext = Mockito.mock(TaskAttemptContext.class);
    Mockito.when(localContext.getConfiguration()).thenReturn(config);
    Mockito.when(localContext.getTaskAttemptID()).thenReturn(taskId);
    return localContext;
}

From source file:com.intel.genomicsdb.GenomicsDBInputFormat.java

License:Open Source License

public RecordReader<String, VCONTEXT> createRecordReader(InputSplit inputSplit,
        TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {

    String loaderJson;/*from  w  w  w  .  j  a v a  2 s . c  o m*/
    String queryJson;

    GenomicsDBFeatureReader<VCONTEXT, SOURCE> featureReader;
    GenomicsDBRecordReader<VCONTEXT, SOURCE> recordReader;

    if (taskAttemptContext != null) {
        Configuration configuration = taskAttemptContext.getConfiguration();
        loaderJson = configuration.get(GenomicsDBConfiguration.LOADERJSON);
        queryJson = configuration.get(GenomicsDBConfiguration.QUERYJSON);
    } else {
        // If control comes here, means this method is called from
        // GenomicsDBRDD. Hence, the configuration object must be
        // set by setConf method, else this will lead to
        // NullPointerException
        assert (configuration != null);
        loaderJson = configuration.get(GenomicsDBConfiguration.LOADERJSON);
        queryJson = configuration.get(GenomicsDBConfiguration.QUERYJSON);
    }

    featureReader = new GenomicsDBFeatureReader<VCONTEXT, SOURCE>(loaderJson, queryJson,
            (FeatureCodec<VCONTEXT, SOURCE>) new BCF2Codec());
    recordReader = new GenomicsDBRecordReader<VCONTEXT, SOURCE>(featureReader);
    return recordReader;
}

From source file:com.jbw.tar.sf.TarOutputFormat.java

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String extension = ".tar";

    Path file = getDefaultWorkFile(context, extension);
    FileSystem fs = file.getFileSystem(conf);
    OutputStream fileOut = fs.create(file, false);

    //tar?//from   w ww .j av  a  2 s . c o  m
    return new TarOutputWriter<>(fileOut);

}

From source file:com.jbw.taroutputformat.TarOutputFormat.java

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext tac) throws IOException, InterruptedException {
    Configuration conf = tac.getConfiguration();
    String extension = ".tar";
    Path file = getDefaultWorkFile(tac, extension);
    FileSystem fs = file.getFileSystem(conf);
    OutputStream fileOut = fs.create(file, false);
    return new TarOutputWriter<>(fileOut);
}

From source file:com.jumptap.h2redis.RedisOutputFormat.java

License:Open Source License

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new RedisHMRecordWriter(context.getConfiguration());
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Performs all the necessary actions to initialize and prepare this record reader.
 *//*w  w w .jav  a2  s.c o m*/
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    this.ctx = context;
    conf = context.getConfiguration();
    keysRead = 0;
    components = Sets.newHashSetWithExpectedSize(3);
    FileSplit split = (FileSplit) inputSplit;
    validateConfiguration(conf);

    // Get comparator. Subcomparator can be null.
    AbstractType<?> comparator = getConfComparator(conf);
    AbstractType<?> subcomparator = getConfSubComparator(conf);

    // Get partitioner for keys
    IPartitioner partitioner = getConfPartitioner(conf);

    // Move minimum required db tables to local disk.
    Path dataTablePath = split.getPath();
    FileSystem remoteFS = FileSystem.get(dataTablePath.toUri(), conf);
    FileSystem localFS = FileSystem.getLocal(conf);
    copyTablesToLocal(remoteFS, localFS, dataTablePath, context);
    CFMetaData cfMetaData;
    if (getConfIsSparse(conf)) {
        cfMetaData = CFMetaData.sparseCFMetaData(getDescriptor().ksname, getDescriptor().cfname, comparator);
    } else {
        cfMetaData = CFMetaData.denseCFMetaData(getDescriptor().ksname, getDescriptor().cfname, comparator,
                subcomparator);
    }
    // Open table and get scanner
    SSTableReader tableReader = openSSTableReader(partitioner, cfMetaData);
    setTableScanner(tableReader);
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Moves all the minimum required tables for the table reader to work to local disk.
 *
 * @param split The table to work on.//from w w  w.  ja v a 2  s  .  c o m
 */
@VisibleForTesting
void copyTablesToLocal(FileSystem remoteFS, FileSystem localFS, Path dataTablePath, TaskAttemptContext context)
        throws IOException {
    Configuration conf = context.getConfiguration();
    String hdfsDataTablePathStr = dataTablePath.toUri().getPath();
    String localDataTablePathStr = dataTablePath.toUri().getHost() + File.separator
            + dataTablePath.toUri().getPath();
    // Make path relative due to EMR permissions
    if (localDataTablePathStr.startsWith("/")) {
        String mapTaskId = conf.get("mapreduce.task.attempt.id");
        String mapTempDir = conf.get("mapreduce.cluster.temp.dir");
        String taskWorkDir = mapTempDir + File.separator + mapTaskId;
        LOG.info("Appending {} to {}", taskWorkDir, localDataTablePathStr);
        localDataTablePathStr = taskWorkDir + localDataTablePathStr;
    }
    Path localDataTablePath = new Path(localDataTablePathStr);
    LOG.info("Copying hdfs file from {} to local disk at {}.", dataTablePath.toUri(),
            localDataTablePath.toUri());
    copyToLocalFile(remoteFS, localFS, dataTablePath, localDataTablePath);
    boolean isCompressed = conf.getBoolean(PropertyConstants.COMPRESSION_ENABLED.txt, false);
    if (isCompressed) {
        decompress(localDataTablePath, context);
    }
    components.add(Component.DATA);
    desc = Descriptor.fromFilename(localDataTablePathStr);
    Descriptor hdfsDesc = Descriptor.fromFilename(hdfsDataTablePathStr);
    String indexPathStr = hdfsDesc.filenameFor(Component.PRIMARY_INDEX);
    components.add(Component.PRIMARY_INDEX);
    Path localIdxPath = new Path(desc.filenameFor(Component.PRIMARY_INDEX));
    LOG.info("Copying hdfs file from {} to local disk at {}.", indexPathStr, localIdxPath);
    copyToLocalFile(remoteFS, localFS, new Path(indexPathStr), localIdxPath);
    if (isCompressed) {
        decompress(localIdxPath, context);
    }
    String compressionTablePathStr = hdfsDesc.filenameFor(Component.COMPRESSION_INFO.name());
    Path compressionTablePath = new Path(compressionTablePathStr);
    if (remoteFS.exists(compressionTablePath)) {
        Path localCompressionPath = new Path(desc.filenameFor(Component.COMPRESSION_INFO.name()));
        LOG.info("Copying hdfs file from {} to local disk at {}.", compressionTablePath.toUri(),
                localCompressionPath);
        copyToLocalFile(remoteFS, localFS, compressionTablePath, localCompressionPath);
        if (isCompressed) {
            decompress(localCompressionPath, context);
        }
        components.add(Component.COMPRESSION_INFO);
    }
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Decompresses input files that were snappy compressed before opening them with the sstable
 * reader. It writes a new decompressed file with the same name as the compressed one. The old
 * one gets deleted.// w  w  w  .jav  a 2 s. co m
 */
private void decompress(Path localTablePath, TaskAttemptContext context) throws IOException {
    context.setStatus(String.format("Decompressing %s", localTablePath.toUri()));
    int compressionBufSize = context.getConfiguration().getInt(PropertyConstants.DECOMPRESS_BUFFER.txt,
            DEFAULT_DECOMPRESS_BUFFER_SIZE);
    compressionBufSize *= 1024;
    LOG.info("Decompressing {} with buffer size {}.", localTablePath, compressionBufSize);
    File compressedFile = new File(localTablePath.toString());
    InputStream fis = new FileInputStream(compressedFile);
    InputStream bis = new BufferedInputStream(fis, compressionBufSize);
    InputStream sip = new SnappyInputStream(bis);
    File decompressedFile = new File(localTablePath.toString() + ".tmp");

    OutputStream os = new FileOutputStream(decompressedFile);
    OutputStream bos = new BufferedOutputStream(os, compressionBufSize);
    byte[] inByteArr = new byte[compressionBufSize];
    int bytesRead = 0;
    int bytesSinceLastReport = 0;
    while ((bytesRead = sip.read(inByteArr)) > 0) {
        bos.write(inByteArr, 0, bytesRead);
        bytesSinceLastReport += bytesRead;
        // Avoid timeouts. Report progress to the jobtracker.
        if (bytesSinceLastReport % REPORT_DECOMPRESS_PROGRESS_EVERY_GBS > 0) {
            context.setStatus(String.format("Decompressed %d bytes.", bytesSinceLastReport));
            bytesSinceLastReport -= REPORT_DECOMPRESS_PROGRESS_EVERY_GBS;
        }
    }
    sip.close();
    bos.close();
    compressedFile.delete();
    decompressedFile.renameTo(compressedFile);
}