Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.hp.hpit.cs.MyTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    CompressionCodec codec = null;/*from  ww w .  j  a  v a 2 s .  com*/
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.BeFileInputReader.java

License:Open Source License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    _config = context.getConfiguration();
    _fileSplit = (CombineFileSplit) inputSplit;
    _numFiles = _fileSplit.getNumPaths();

    _maxRecords = _config.getInt(BatchEnrichmentJob.BE_DEBUG_MAX_SIZE, Integer.MAX_VALUE);

    this.start = new Date();
    final String contextSignature = context.getConfiguration().get(BatchEnrichmentJob.BE_CONTEXT_SIGNATURE);
    try {//  w  w  w  .j  a va  2 s.co  m
        this._enrichmentContext = ContextUtils.getEnrichmentContext(contextSignature);
        this._dataBucket = _enrichmentContext.getBucket().get();
    } catch (Exception e) {
        throw new IOException(e);
    }

    final String jobName = _config.get("mapred.job.name", "unknown");
    logger.info(
            jobName + ": new split, contains " + _numFiles + " files, total size: " + _fileSplit.getLength());

}

From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java

License:Apache License

@Test
public void test_Aleph2EsRecordReader_maxRecords() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class);
    Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); // (ie will keep going forever)
    Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 4.0); // (just return some dummy number so we can check it's working)

    // Test version
    {/*from  w  ww  .  j  a  v  a 2 s. c  o m*/
        final Configuration config = new Configuration(false);
        config.set(Aleph2EsInputFormat.BE_DEBUG_MAX_SIZE, "10");
        final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(mock_task.getConfiguration()).thenReturn(config);

        final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

        try {
            reader_under_test.initialize(null, mock_task);
        } catch (Exception e) {
        } // (the _delegate init call will fail out, that's fine)

        int ii = 0;
        for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) {
            assertTrue("getProgress should be overridden", reader_under_test.getProgress() <= 1.0);
        }
        assertEquals("Should have stopped after 10 iterations", 10, ii);
    }
    // Normal version
    {
        final Configuration config = new Configuration(false);
        final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(mock_task.getConfiguration()).thenReturn(config);

        final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

        try {
            reader_under_test.initialize(null, mock_task);
        } catch (Exception e) {
        } // (the _delegate init call will fail out, that's fine)

        int ii = 0;
        for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) {
            assertTrue("getProgress should return the dummy value", reader_under_test.getProgress() == 4.0);
        }
        assertEquals("Should keep going for all 100 iterations", 100, ii);
    }
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteEsInputFormat.java

License:Apache License

@Override
public RecordReader createRecordReader(InputSplit arg0, TaskAttemptContext arg1)
        throws IOException, InterruptedException {

    if (null == _delegate) {
        _delegate = new EsInputFormat();
    }/*ww  w . j  a  v  a 2s .c o  m*/
    int limitOverride = MongoConfigUtil.getLimit(arg1.getConfiguration());
    if (limitOverride > 0) {
        InfiniteEsRecordReader.MAX_RECORDS = limitOverride;
    }
    return new InfiniteEsRecordReader(arg1, _delegate.createRecordReader(arg0, arg1));
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteFileInputReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    _config = context.getConfiguration();
    _fileSplit = (CombineFileSplit) inputSplit;
    _numFiles = _fileSplit.getNumPaths();

    String jobName = _config.get("mapred.job.name", "unknown");
    _logger.info(// w  ww  . j a v  a2 s . co  m
            jobName + ": new split, contains " + _numFiles + " files, total size: " + _fileSplit.getLength());

    String sourceStr = _config.get("mongo.input.query");
    SourcePojo source = ApiManager.mapFromApi(sourceStr, SourcePojo.class, null);
    _fileConfig = source.getFileConfig();

    String fields = _config.get("mongo.input.fields", "");
    if (fields.length() > 2) {
        try {
            _fieldsToDelete = (BasicDBObject) com.mongodb.util.JSON.parse(fields);
        } catch (Exception e) {
            throw new IOException("Invalid fields specification: " + fields);
        }
    }

    _debugLimit = _config.getInt("mongo.input.limit", Integer.MAX_VALUE);
    if (_debugLimit <= 0) { // (just not set)
        _debugLimit = Integer.MAX_VALUE;
    }
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat.java

License:Open Source License

@Override
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) {
    if (InfiniteMongoConfigUtil.getUpdateModeIncremental(context.getConfiguration())) {
        return new InfiniteMongoRecordWriter<K, V>(
                MongoConfigUtil.getOutputCollection(context.getConfiguration()), context, "key");
    } else {/*from  w  w w .j  av  a  2s. c  o m*/
        return new InfiniteMongoRecordWriter<K, V>(
                MongoConfigUtil.getOutputCollection(context.getConfiguration()), context);
    }
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) {
    super.initialize(split, context);

    String jobName = context.getConfiguration().get("mapred.job.name", "unknown");
    log.info(jobName + ": new split");
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteShareInputReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    _config = context.getConfiguration();
    _fileSplit = (InfiniteShareInputSplit) inputSplit;
    _numFiles = 1;//ww  w.  j  a  v a2 s  .c  o  m

    String jobName = _config.get("mapred.job.name", "unknown");
    _logger.info(
            jobName + ": new split, contains " + _numFiles + " files, total size: " + _fileSplit.getLength());

    String sourceStr = _config.get("mongo.input.query");
    SourcePojo source = ApiManager.mapFromApi(sourceStr, SourcePojo.class, null);
    _fileConfig = source.getFileConfig();

    String fields = _config.get("mongo.input.fields", "");
    if (fields.length() > 2) {
        try {
            _fieldsToDelete = (BasicDBObject) com.mongodb.util.JSON.parse(fields);
        } catch (Exception e) {
            throw new IOException("Invalid fields specification: " + fields);
        }
    }

    _debugLimit = _config.getInt("mongo.input.limit", Integer.MAX_VALUE);
    if (_debugLimit <= 0) { // (just not set)
        _debugLimit = Integer.MAX_VALUE;
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java

License:Apache License

@Test
public void testGetSplits() throws Exception {
    DistCpOptions options = getOptions();
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
            new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"),
            options);/*from  w ww .jav a 2 s .com*/

    JobID jobId = new JobID();
    JobContext jobContext = mock(JobContext.class);
    when(jobContext.getConfiguration()).thenReturn(configuration);
    when(jobContext.getJobID()).thenReturn(jobId);
    DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>();
    List<InputSplit> splits = inputFormat.getSplits(jobContext);

    int nFiles = 0;
    int taskId = 0;

    for (InputSplit split : splits) {
        TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0);
        final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
        when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId);
        RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        recordReader.initialize(splits.get(0), taskAttemptContext);
        float previousProgressValue = 0f;
        while (recordReader.nextKeyValue()) {
            FileStatus fileStatus = recordReader.getCurrentValue();
            String source = fileStatus.getPath().toString();
            System.out.println(source);
            Assert.assertTrue(expectedFilePaths.contains(source));
            final float progress = recordReader.getProgress();
            Assert.assertTrue(progress >= previousProgressValue);
            Assert.assertTrue(progress >= 0.0f);
            Assert.assertTrue(progress <= 1.0f);
            previousProgressValue = progress;
            ++nFiles;
        }
        Assert.assertTrue(recordReader.getProgress() == 1.0f);

        ++taskId;
    }

    Assert.assertEquals(expectedFilePaths.size(), nFiles);
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(Configuration conf) {
    TaskAttemptContext context = Mockito.mock(TaskAttemptContext.class);
    Mockito.when(context.getConfiguration()).thenReturn(conf);
    TaskAttemptID taskId = new TaskAttemptID("200707121733", 1, false, 1, 1);
    Mockito.when(context.getTaskAttemptID()).thenReturn(taskId);
    return context;
}