Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyOutputFormat.java

License:Apache License

@Test
public void testGetOutputCommitter() {
    try {/*from ww w  . j  ava 2s . c o  m*/
        TaskAttemptContext context = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(context.getTaskAttemptID()).thenReturn(new TaskAttemptID("200707121733", 1, false, 1, 1));
        Configuration conf = new Configuration();
        Mockito.when(context.getConfiguration()).thenReturn(conf);
        context.getConfiguration().set("mapred.output.dir", "/out");
        Assert.assertTrue(new CopyOutputFormat().getOutputCommitter(context) instanceof CopyCommitter);
    } catch (IOException e) {
        LOG.error("Exception encountered ", e);
        Assert.fail("Unable to get output committer");
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java

License:Apache License

public void testGetSplits(int nMaps) throws Exception {
    DistCpOptions options = getOptions(nMaps);
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq");
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options);

    JobContext jobContext = Mockito.mock(JobContext.class);
    Mockito.when(jobContext.getConfiguration()).thenReturn(configuration);
    Mockito.when(jobContext.getJobID()).thenReturn(new JobID());
    UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
    List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext);

    //Removing the legacy check - Refer HADOOP-9230
    int sizePerMap = totalFileSize / nMaps;

    checkSplits(listFile, splits);/*from  w  w w. j  a v  a2  s . co  m*/

    int doubleCheckedTotalSize = 0;
    int previousSplitSize = -1;
    for (int i = 0; i < splits.size(); ++i) {
        InputSplit split = splits.get(i);
        int currentSplitSize = 0;
        TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0);
        final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId);
        RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split,
                taskAttemptContext);
        recordReader.initialize(split, taskAttemptContext);
        while (recordReader.nextKeyValue()) {
            Path sourcePath = recordReader.getCurrentValue().getPath();
            FileSystem fs = sourcePath.getFileSystem(configuration);
            FileStatus fileStatus[] = fs.listStatus(sourcePath);
            Assert.assertEquals(fileStatus.length, 1);
            currentSplitSize += fileStatus[0].getLen();
        }
        Assert.assertTrue(previousSplitSize == -1
                || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1);

        doubleCheckedTotalSize += currentSplitSize;
    }

    Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}

From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(Configuration config, TaskAttemptID taskId) {
    TaskAttemptContext localContext = Mockito.mock(TaskAttemptContext.class);
    Mockito.when(localContext.getConfiguration()).thenReturn(config);
    Mockito.when(localContext.getTaskAttemptID()).thenReturn(taskId);
    return localContext;
}

From source file:com.intel.genomicsdb.GenomicsDBInputFormat.java

License:Open Source License

public RecordReader<String, VCONTEXT> createRecordReader(InputSplit inputSplit,
        TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {

    String loaderJson;/*from  w  w  w  .  j  a v a  2 s . c  o m*/
    String queryJson;

    GenomicsDBFeatureReader<VCONTEXT, SOURCE> featureReader;
    GenomicsDBRecordReader<VCONTEXT, SOURCE> recordReader;

    if (taskAttemptContext != null) {
        Configuration configuration = taskAttemptContext.getConfiguration();
        loaderJson = configuration.get(GenomicsDBConfiguration.LOADERJSON);
        queryJson = configuration.get(GenomicsDBConfiguration.QUERYJSON);
    } else {
        // If control comes here, means this method is called from
        // GenomicsDBRDD. Hence, the configuration object must be
        // set by setConf method, else this will lead to
        // NullPointerException
        assert (configuration != null);
        loaderJson = configuration.get(GenomicsDBConfiguration.LOADERJSON);
        queryJson = configuration.get(GenomicsDBConfiguration.QUERYJSON);
    }

    featureReader = new GenomicsDBFeatureReader<VCONTEXT, SOURCE>(loaderJson, queryJson,
            (FeatureCodec<VCONTEXT, SOURCE>) new BCF2Codec());
    recordReader = new GenomicsDBRecordReader<VCONTEXT, SOURCE>(featureReader);
    return recordReader;
}

From source file:com.jbw.tar.sf.TarOutputFormat.java

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String extension = ".tar";

    Path file = getDefaultWorkFile(context, extension);
    FileSystem fs = file.getFileSystem(conf);
    OutputStream fileOut = fs.create(file, false);

    //tar?//from   w ww .j av  a  2 s . c o  m
    return new TarOutputWriter<>(fileOut);

}

From source file:com.jbw.taroutputformat.TarOutputFormat.java

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext tac) throws IOException, InterruptedException {
    Configuration conf = tac.getConfiguration();
    String extension = ".tar";
    Path file = getDefaultWorkFile(tac, extension);
    FileSystem fs = file.getFileSystem(conf);
    OutputStream fileOut = fs.create(file, false);
    return new TarOutputWriter<>(fileOut);
}

From source file:com.jumptap.h2redis.RedisOutputFormat.java

License:Open Source License

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new RedisHMRecordWriter(context.getConfiguration());
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Performs all the necessary actions to initialize and prepare this record reader.
 *//*w  w w .jav  a2  s.c o m*/
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    this.ctx = context;
    conf = context.getConfiguration();
    keysRead = 0;
    components = Sets.newHashSetWithExpectedSize(3);
    FileSplit split = (FileSplit) inputSplit;
    validateConfiguration(conf);

    // Get comparator. Subcomparator can be null.
    AbstractType<?> comparator = getConfComparator(conf);
    AbstractType<?> subcomparator = getConfSubComparator(conf);

    // Get partitioner for keys
    IPartitioner partitioner = getConfPartitioner(conf);

    // Move minimum required db tables to local disk.
    Path dataTablePath = split.getPath();
    FileSystem remoteFS = FileSystem.get(dataTablePath.toUri(), conf);
    FileSystem localFS = FileSystem.getLocal(conf);
    copyTablesToLocal(remoteFS, localFS, dataTablePath, context);
    CFMetaData cfMetaData;
    if (getConfIsSparse(conf)) {
        cfMetaData = CFMetaData.sparseCFMetaData(getDescriptor().ksname, getDescriptor().cfname, comparator);
    } else {
        cfMetaData = CFMetaData.denseCFMetaData(getDescriptor().ksname, getDescriptor().cfname, comparator,
                subcomparator);
    }
    // Open table and get scanner
    SSTableReader tableReader = openSSTableReader(partitioner, cfMetaData);
    setTableScanner(tableReader);
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Moves all the minimum required tables for the table reader to work to local disk.
 *
 * @param split The table to work on.//from w w  w.  ja v a 2  s  .  c o m
 */
@VisibleForTesting
void copyTablesToLocal(FileSystem remoteFS, FileSystem localFS, Path dataTablePath, TaskAttemptContext context)
        throws IOException {
    Configuration conf = context.getConfiguration();
    String hdfsDataTablePathStr = dataTablePath.toUri().getPath();
    String localDataTablePathStr = dataTablePath.toUri().getHost() + File.separator
            + dataTablePath.toUri().getPath();
    // Make path relative due to EMR permissions
    if (localDataTablePathStr.startsWith("/")) {
        String mapTaskId = conf.get("mapreduce.task.attempt.id");
        String mapTempDir = conf.get("mapreduce.cluster.temp.dir");
        String taskWorkDir = mapTempDir + File.separator + mapTaskId;
        LOG.info("Appending {} to {}", taskWorkDir, localDataTablePathStr);
        localDataTablePathStr = taskWorkDir + localDataTablePathStr;
    }
    Path localDataTablePath = new Path(localDataTablePathStr);
    LOG.info("Copying hdfs file from {} to local disk at {}.", dataTablePath.toUri(),
            localDataTablePath.toUri());
    copyToLocalFile(remoteFS, localFS, dataTablePath, localDataTablePath);
    boolean isCompressed = conf.getBoolean(PropertyConstants.COMPRESSION_ENABLED.txt, false);
    if (isCompressed) {
        decompress(localDataTablePath, context);
    }
    components.add(Component.DATA);
    desc = Descriptor.fromFilename(localDataTablePathStr);
    Descriptor hdfsDesc = Descriptor.fromFilename(hdfsDataTablePathStr);
    String indexPathStr = hdfsDesc.filenameFor(Component.PRIMARY_INDEX);
    components.add(Component.PRIMARY_INDEX);
    Path localIdxPath = new Path(desc.filenameFor(Component.PRIMARY_INDEX));
    LOG.info("Copying hdfs file from {} to local disk at {}.", indexPathStr, localIdxPath);
    copyToLocalFile(remoteFS, localFS, new Path(indexPathStr), localIdxPath);
    if (isCompressed) {
        decompress(localIdxPath, context);
    }
    String compressionTablePathStr = hdfsDesc.filenameFor(Component.COMPRESSION_INFO.name());
    Path compressionTablePath = new Path(compressionTablePathStr);
    if (remoteFS.exists(compressionTablePath)) {
        Path localCompressionPath = new Path(desc.filenameFor(Component.COMPRESSION_INFO.name()));
        LOG.info("Copying hdfs file from {} to local disk at {}.", compressionTablePath.toUri(),
                localCompressionPath);
        copyToLocalFile(remoteFS, localFS, compressionTablePath, localCompressionPath);
        if (isCompressed) {
            decompress(localCompressionPath, context);
        }
        components.add(Component.COMPRESSION_INFO);
    }
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Decompresses input files that were snappy compressed before opening them with the sstable
 * reader. It writes a new decompressed file with the same name as the compressed one. The old
 * one gets deleted.// w  w  w  .jav  a 2 s. co m
 */
private void decompress(Path localTablePath, TaskAttemptContext context) throws IOException {
    context.setStatus(String.format("Decompressing %s", localTablePath.toUri()));
    int compressionBufSize = context.getConfiguration().getInt(PropertyConstants.DECOMPRESS_BUFFER.txt,
            DEFAULT_DECOMPRESS_BUFFER_SIZE);
    compressionBufSize *= 1024;
    LOG.info("Decompressing {} with buffer size {}.", localTablePath, compressionBufSize);
    File compressedFile = new File(localTablePath.toString());
    InputStream fis = new FileInputStream(compressedFile);
    InputStream bis = new BufferedInputStream(fis, compressionBufSize);
    InputStream sip = new SnappyInputStream(bis);
    File decompressedFile = new File(localTablePath.toString() + ".tmp");

    OutputStream os = new FileOutputStream(decompressedFile);
    OutputStream bos = new BufferedOutputStream(os, compressionBufSize);
    byte[] inByteArr = new byte[compressionBufSize];
    int bytesRead = 0;
    int bytesSinceLastReport = 0;
    while ((bytesRead = sip.read(inByteArr)) > 0) {
        bos.write(inByteArr, 0, bytesRead);
        bytesSinceLastReport += bytesRead;
        // Avoid timeouts. Report progress to the jobtracker.
        if (bytesSinceLastReport % REPORT_DECOMPRESS_PROGRESS_EVERY_GBS > 0) {
            context.setStatus(String.format("Decompressed %d bytes.", bytesSinceLastReport));
            bytesSinceLastReport -= REPORT_DECOMPRESS_PROGRESS_EVERY_GBS;
        }
    }
    sip.close();
    bos.close();
    compressedFile.delete();
    decompressedFile.renameTo(compressedFile);
}