List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:co.cask.cdap.data.stream.StreamInputFormat.java
License:Apache License
@Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new StreamRecordReader<>(createStreamEventDecoder(context.getConfiguration())); }
From source file:co.cask.cdap.data.stream.StreamRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { inputSplit = (StreamInputSplit) split; reader = createReader(FileSystem.get(context.getConfiguration()), inputSplit); reader.initialize();//w w w. j ava2 s .c o m readFilter = new TimeRangeReadFilter(inputSplit.getStartTime(), inputSplit.getEndTime()); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchReadableInputFormat.java
License:Apache License
@Override public RecordReader<KEY, VALUE> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { DataSetInputSplit inputSplit = (DataSetInputSplit) split; Configuration conf = context.getConfiguration(); String datasetName = conf.get(DATASET_NAME); Map<String, String> datasetArgs = GSON.fromJson(conf.get(DATASET_ARGS), DATASET_ARGS_TYPE); @SuppressWarnings("unchecked") BatchReadable<KEY, VALUE> batchReadable = createBatchReadable(context, datasetName, datasetArgs); SplitReader<KEY, VALUE> splitReader = batchReadable.createSplitReader(inputSplit.getSplit()); return new SplitReaderRecordReader<>(splitReader); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchWritableOutputFormat.java
License:Apache License
@Override public RecordWriter<KEY, VALUE> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String datasetName = conf.get(DATASET_NAME); Map<String, String> datasetArgs = GSON.fromJson(conf.get(DATASET_ARGS), DATASET_ARGS_TYPE); return new BatchWritableRecordWriter<>(createBatchWritable(context, datasetName, datasetArgs)); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetInputFormat.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from w w w . ja v a2s . c om*/ public RecordReader<KEY, VALUE> createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { DataSetInputSplit inputSplit = (DataSetInputSplit) split; Configuration conf = context.getConfiguration(); // we don't currently allow datasets as the format between map and reduce stages, otherwise we'll have to // pass in the stage here instead of hardcoding mapper. MapReduceContextProvider contextProvider = new MapReduceContextProvider(context, MapReduceMetrics.TaskType.Mapper); BasicMapReduceContext mrContext = contextProvider.get(); mrContext.getMetricsCollectionService().startAndWait(); String dataSetName = getInputName(conf); BatchReadable<KEY, VALUE> inputDataset = mrContext.getDataset(dataSetName); SplitReader<KEY, VALUE> splitReader = inputDataset.createSplitReader(inputSplit.getSplit()); // the record reader now owns the context and will close it return new DataSetRecordReader<>(splitReader, contextProvider); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetOutputFormat.java
License:Apache License
@Override public RecordWriter<KEY, VALUE> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); // we don't currently allow datasets as the format between map and reduce stages, otherwise we'll have to // pass in the stage here instead of hardcoding reducer. MapReduceContextProvider contextProvider = new MapReduceContextProvider(context, MapReduceMetrics.TaskType.Reducer); BasicMapReduceContext mrContext = contextProvider.get(); mrContext.getMetricsCollectionService().startAndWait(); @SuppressWarnings("unchecked") BatchWritable<KEY, VALUE> dataset = mrContext.getDataset(getOutputDataSet(conf)); // the record writer now owns the context and will close it return new DataSetRecordWriter<>(dataset, contextProvider); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingInputFormat.java
License:Apache License
@Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split; ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration()); return new DelegatingRecordReader<>(taggedInputSplit, context); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingRecordReader.java
License:Apache License
/** * Constructs the DelegatingRecordReader. * * @param taggedInputSplit TaggedInputSplit object * @param context TaskAttemptContext object * * @throws IOException/*from w w w . j a v a2s. c om*/ * @throws InterruptedException */ @SuppressWarnings("unchecked") public DelegatingRecordReader(TaggedInputSplit taggedInputSplit, TaskAttemptContext context) throws IOException, InterruptedException { // Find the InputFormat and then the RecordReader from the TaggedInputSplit. InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration()); InputSplit inputSplit = taggedInputSplit.getInputSplit(); originalRR = inputFormat.createRecordReader(inputSplit, context); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputFormat.java
License:Apache License
@Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { MultiInputTaggedSplit taggedInputSplit = (MultiInputTaggedSplit) split; ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration()); InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration()); InputSplit inputSplit = taggedInputSplit.getInputSplit(); // we can't simply compute the underlying RecordReader and return it, because we need to override its // initialize method in order to initialize the underlying RecordReader with the underlying InputSplit // Find the InputFormat and then the RecordReader from the MultiInputTaggedSplit. return new DelegatingRecordReader<>(inputFormat.createRecordReader(inputSplit, context)); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java
License:Apache License
@SuppressWarnings("unchecked") private synchronized RecordWriter getRecordWriter(String namedOutput) throws IOException, InterruptedException { // look for record-writer in the cache RecordWriter writer = recordWriters.get(namedOutput); // If not in cache, create a new one if (writer == null) { // get the record writer from context output format TaskAttemptContext taskContext = getContext(namedOutput); Class<? extends OutputFormat<?, ?>> outputFormatClass; try {/* w w w . j a va 2 s .c o m*/ outputFormatClass = taskContext.getOutputFormatClass(); } catch (ClassNotFoundException e) { throw new IOException(e); } ClassLoader outputFormatClassLoader = outputFormatClass.getClassLoader(); // This is needed in case the OutputFormat's classloader conflicts with the program classloader (for example, // TableOutputFormat). ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(outputFormatClassLoader); try { // We use ReflectionUtils to instantiate the OutputFormat, because it also calls setConf on the object, if it // is a org.apache.hadoop.conf.Configurable. OutputFormat<?, ?> outputFormat = ReflectionUtils.newInstance(outputFormatClass, taskContext.getConfiguration()); writer = new MeteredRecordWriter<>(outputFormat.getRecordWriter(taskContext), context); } finally { ClassLoaders.setContextClassLoader(oldClassLoader); } // add the record-writer to the cache recordWriters.put(namedOutput, writer); } return writer; }