List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitionerWriterWrapper.java
License:Apache License
DynamicPartitionerWriterWrapper(TaskAttemptContext job) { this.job = job; this.outputName = DynamicPartitioningOutputFormat.getOutputName(job); Configuration configuration = job.getConfiguration(); Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass( PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class); this.dynamicPartitioner = new InstantiatorFactory(false).get(TypeToken.of(partitionerClass)).create(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); this.taskContext = classLoader.getTaskContextProvider().get(job); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); this.partitioning = outputDataset.getPartitioning(); this.dynamicPartitioner.initialize(taskContext); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitionerWriterWrapper.java
License:Apache License
private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName) throws IOException { Job job = new Job(context.getConfiguration()); DynamicPartitioningOutputFormat.setOutputName(job, newOutputName); // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat. if (isAvroOutputFormat(getFileOutputFormat(context))) { job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName); }//w w w .j av a 2s. c om Path jobOutputPath = DynamicPartitioningOutputFormat .createJobSpecificPath(FileOutputFormat.getOutputPath(job), context); DynamicPartitioningOutputFormat.setOutputPath(job, jobOutputPath); return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID()); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitionerWriterWrapper.java
License:Apache License
private FileOutputFormat<K, V> getFileOutputFormat(TaskAttemptContext job) { if (fileOutputFormat == null) { Class<? extends FileOutputFormat> delegateOutputFormat = job.getConfiguration().getClass( Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, null, FileOutputFormat.class); @SuppressWarnings("unchecked") FileOutputFormat<K, V> fileOutputFormat = ReflectionUtils.newInstance(delegateOutputFormat, job.getConfiguration()); this.fileOutputFormat = fileOutputFormat; }/* w w w .j a va2 s . co m*/ return fileOutputFormat; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java
License:Apache License
/** * Create a composite record writer that can write key/value data to different output files. * * @return a composite record writer//from w ww . j a v a2s. c om * @throws IOException */ @Override public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext job) throws IOException { final String outputName = FileOutputFormat.getOutputName(job); Configuration configuration = job.getConfiguration(); Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass( PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class); @SuppressWarnings("unchecked") final DynamicPartitioner<K, V> dynamicPartitioner = new InstantiatorFactory(false) .get(TypeToken.of(partitionerClass)).create(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); final BasicMapReduceTaskContext<K, V> taskContext = classLoader.getTaskContextProvider().get(job); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); final Partitioning partitioning = outputDataset.getPartitioning(); dynamicPartitioner.initialize(taskContext); return new RecordWriter<K, V>() { // a cache storing the record writers for different output files. Map<PartitionKey, RecordWriter<K, V>> recordWriters = new HashMap<>(); public void write(K key, V value) throws IOException, InterruptedException { PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value); RecordWriter<K, V> rw = this.recordWriters.get(partitionKey); if (rw == null) { String relativePath = PartitionedFileSetDataset.getOutputPath(partitionKey, partitioning); String finalPath = relativePath + "/" + outputName; // if we don't have the record writer yet for the final path, create one and add it to the cache rw = getBaseRecordWriter(getTaskAttemptContext(job, finalPath)); this.recordWriters.put(partitionKey, rw); } rw.write(key, value); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { try { List<RecordWriter<?, ?>> recordWriters = new ArrayList<>(); recordWriters.addAll(this.recordWriters.values()); MultipleOutputs.closeRecordWriters(recordWriters, context); taskContext.flushOperations(); } catch (Exception e) { throw new IOException(e); } finally { dynamicPartitioner.destroy(); } } }; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java
License:Apache License
private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName) throws IOException { Job job = new Job(context.getConfiguration()); FileOutputFormat.setOutputName(job, newOutputName); // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat. if (isAvroOutputFormat(getFileOutputFormat(context))) { job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName); }/*from ww w. ja v a 2 s . c o m*/ Path jobOutputPath = createJobSpecificPath(FileOutputFormat.getOutputPath(job), context); FileOutputFormat.setOutputPath(job, jobOutputPath); return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID()); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java
License:Apache License
private FileOutputFormat<K, V> getFileOutputFormat(TaskAttemptContext job) { if (fileOutputFormat == null) { Class<? extends FileOutputFormat> delegateOutputFormat = job.getConfiguration().getClass( Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, null, FileOutputFormat.class); @SuppressWarnings("unchecked") FileOutputFormat<K, V> fileOutputFormat = new InstantiatorFactory(false) .get(TypeToken.of(delegateOutputFormat)).create(); this.fileOutputFormat = fileOutputFormat; }// w w w .j av a 2 s . com return fileOutputFormat; }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceBatchReadableInputFormat.java
License:Apache License
@Override protected BatchReadable<KEY, VALUE> createBatchReadable(TaskAttemptContext context, String datasetName, Map<String, String> datasetArgs) { MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(context.getConfiguration()); BasicMapReduceTaskContext<?, ?> taskContext = classLoader.getTaskContextProvider().get(context); return taskContext.getBatchReadable(datasetName, datasetArgs); }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceBatchWritableOutputFormat.java
License:Apache License
@Override protected CloseableBatchWritable<KEY, VALUE> createBatchWritable(TaskAttemptContext context, String datasetName, Map<String, String> datasetArgs) { MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(context.getConfiguration()); BasicMapReduceTaskContext<?, ?> taskContext = classLoader.getTaskContextProvider().get(context); return taskContext.getBatchWritable(datasetName, datasetArgs); }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceContextProvider.java
License:Apache License
public MapReduceContextProvider(TaskAttemptContext context, MapReduceMetrics.TaskType type) { this.taskContext = context; this.type = type; this.contextConfig = new MapReduceContextConfig(context.getConfiguration()); this.locationFactory = new LocalLocationFactory(); this.contextBuilder = null; }
From source file:co.cask.cdap.internal.app.runtime.batch.stream.MapReduceStreamInputFormat.java
License:Apache License
private BasicMapReduceTaskContext getMapReduceTaskContext(TaskAttemptContext context) { MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(context.getConfiguration()); return classLoader.getTaskContextProvider().get(context); }