List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.hp.hpit.cs.MyTextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t"); CompressionCodec codec = null;/*from ww w . j a v a 2 s . com*/ String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.BeFileInputReader.java
License:Open Source License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { _config = context.getConfiguration(); _fileSplit = (CombineFileSplit) inputSplit; _numFiles = _fileSplit.getNumPaths(); _maxRecords = _config.getInt(BatchEnrichmentJob.BE_DEBUG_MAX_SIZE, Integer.MAX_VALUE); this.start = new Date(); final String contextSignature = context.getConfiguration().get(BatchEnrichmentJob.BE_CONTEXT_SIGNATURE); try {// w w w .j a va 2 s.co m this._enrichmentContext = ContextUtils.getEnrichmentContext(contextSignature); this._dataBucket = _enrichmentContext.getBucket().get(); } catch (Exception e) { throw new IOException(e); } final String jobName = _config.get("mapred.job.name", "unknown"); logger.info( jobName + ": new split, contains " + _numFiles + " files, total size: " + _fileSplit.getLength()); }
From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java
License:Apache License
@Test public void test_Aleph2EsRecordReader_maxRecords() throws IOException, InterruptedException { @SuppressWarnings("rawtypes") final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class); Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); // (ie will keep going forever) Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 4.0); // (just return some dummy number so we can check it's working) // Test version {/*from w ww . j a v a 2 s. c o m*/ final Configuration config = new Configuration(false); config.set(Aleph2EsInputFormat.BE_DEBUG_MAX_SIZE, "10"); final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class); Mockito.when(mock_task.getConfiguration()).thenReturn(config); final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader); try { reader_under_test.initialize(null, mock_task); } catch (Exception e) { } // (the _delegate init call will fail out, that's fine) int ii = 0; for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) { assertTrue("getProgress should be overridden", reader_under_test.getProgress() <= 1.0); } assertEquals("Should have stopped after 10 iterations", 10, ii); } // Normal version { final Configuration config = new Configuration(false); final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class); Mockito.when(mock_task.getConfiguration()).thenReturn(config); final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader); try { reader_under_test.initialize(null, mock_task); } catch (Exception e) { } // (the _delegate init call will fail out, that's fine) int ii = 0; for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) { assertTrue("getProgress should return the dummy value", reader_under_test.getProgress() == 4.0); } assertEquals("Should keep going for all 100 iterations", 100, ii); } }
From source file:com.ikanow.infinit.e.data_model.custom.InfiniteEsInputFormat.java
License:Apache License
@Override public RecordReader createRecordReader(InputSplit arg0, TaskAttemptContext arg1) throws IOException, InterruptedException { if (null == _delegate) { _delegate = new EsInputFormat(); }/*ww w . j a v a 2s .c o m*/ int limitOverride = MongoConfigUtil.getLimit(arg1.getConfiguration()); if (limitOverride > 0) { InfiniteEsRecordReader.MAX_RECORDS = limitOverride; } return new InfiniteEsRecordReader(arg1, _delegate.createRecordReader(arg0, arg1)); }
From source file:com.ikanow.infinit.e.data_model.custom.InfiniteFileInputReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { _config = context.getConfiguration(); _fileSplit = (CombineFileSplit) inputSplit; _numFiles = _fileSplit.getNumPaths(); String jobName = _config.get("mapred.job.name", "unknown"); _logger.info(// w ww . j a v a2 s . co m jobName + ": new split, contains " + _numFiles + " files, total size: " + _fileSplit.getLength()); String sourceStr = _config.get("mongo.input.query"); SourcePojo source = ApiManager.mapFromApi(sourceStr, SourcePojo.class, null); _fileConfig = source.getFileConfig(); String fields = _config.get("mongo.input.fields", ""); if (fields.length() > 2) { try { _fieldsToDelete = (BasicDBObject) com.mongodb.util.JSON.parse(fields); } catch (Exception e) { throw new IOException("Invalid fields specification: " + fields); } } _debugLimit = _config.getInt("mongo.input.limit", Integer.MAX_VALUE); if (_debugLimit <= 0) { // (just not set) _debugLimit = Integer.MAX_VALUE; } }
From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat.java
License:Open Source License
@Override public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) { if (InfiniteMongoConfigUtil.getUpdateModeIncremental(context.getConfiguration())) { return new InfiniteMongoRecordWriter<K, V>( MongoConfigUtil.getOutputCollection(context.getConfiguration()), context, "key"); } else {/*from w w w .j av a 2s. c o m*/ return new InfiniteMongoRecordWriter<K, V>( MongoConfigUtil.getOutputCollection(context.getConfiguration()), context); } }
From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) { super.initialize(split, context); String jobName = context.getConfiguration().get("mapred.job.name", "unknown"); log.info(jobName + ": new split"); }
From source file:com.ikanow.infinit.e.data_model.custom.InfiniteShareInputReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { _config = context.getConfiguration(); _fileSplit = (InfiniteShareInputSplit) inputSplit; _numFiles = 1;//ww w. j a v a2 s .c o m String jobName = _config.get("mapred.job.name", "unknown"); _logger.info( jobName + ": new split, contains " + _numFiles + " files, total size: " + _fileSplit.getLength()); String sourceStr = _config.get("mongo.input.query"); SourcePojo source = ApiManager.mapFromApi(sourceStr, SourcePojo.class, null); _fileConfig = source.getFileConfig(); String fields = _config.get("mongo.input.fields", ""); if (fields.length() > 2) { try { _fieldsToDelete = (BasicDBObject) com.mongodb.util.JSON.parse(fields); } catch (Exception e) { throw new IOException("Invalid fields specification: " + fields); } } _debugLimit = _config.getInt("mongo.input.limit", Integer.MAX_VALUE); if (_debugLimit <= 0) { // (just not set) _debugLimit = Integer.MAX_VALUE; } }
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java
License:Apache License
@Test public void testGetSplits() throws Exception { DistCpOptions options = getOptions(); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing( new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);/*from w ww .jav a 2 s .com*/ JobID jobId = new JobID(); JobContext jobContext = mock(JobContext.class); when(jobContext.getConfiguration()).thenReturn(configuration); when(jobContext.getJobID()).thenReturn(jobId); DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>(); List<InputSplit> splits = inputFormat.getSplits(jobContext); int nFiles = 0; int taskId = 0; for (InputSplit split : splits) { TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0); final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class); when(taskAttemptContext.getConfiguration()).thenReturn(configuration); when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId); RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(splits.get(0), taskAttemptContext); float previousProgressValue = 0f; while (recordReader.nextKeyValue()) { FileStatus fileStatus = recordReader.getCurrentValue(); String source = fileStatus.getPath().toString(); System.out.println(source); Assert.assertTrue(expectedFilePaths.contains(source)); final float progress = recordReader.getProgress(); Assert.assertTrue(progress >= previousProgressValue); Assert.assertTrue(progress >= 0.0f); Assert.assertTrue(progress <= 1.0f); previousProgressValue = progress; ++nFiles; } Assert.assertTrue(recordReader.getProgress() == 1.0f); ++taskId; } Assert.assertEquals(expectedFilePaths.size(), nFiles); }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
private TaskAttemptContext getTaskAttemptContext(Configuration conf) { TaskAttemptContext context = Mockito.mock(TaskAttemptContext.class); Mockito.when(context.getConfiguration()).thenReturn(conf); TaskAttemptID taskId = new TaskAttemptID("200707121733", 1, false, 1, 1); Mockito.when(context.getTaskAttemptID()).thenReturn(taskId); return context; }