Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.cask.cdap.data.stream.StreamInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new StreamRecordReader<>(createStreamEventDecoder(context.getConfiguration()));
}

From source file:co.cask.cdap.data.stream.StreamRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    inputSplit = (StreamInputSplit) split;
    reader = createReader(FileSystem.get(context.getConfiguration()), inputSplit);
    reader.initialize();//w w w.  j  ava2 s  .c  o m
    readFilter = new TimeRangeReadFilter(inputSplit.getStartTime(), inputSplit.getEndTime());
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchReadableInputFormat.java

License:Apache License

@Override
public RecordReader<KEY, VALUE> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    DataSetInputSplit inputSplit = (DataSetInputSplit) split;
    Configuration conf = context.getConfiguration();

    String datasetName = conf.get(DATASET_NAME);
    Map<String, String> datasetArgs = GSON.fromJson(conf.get(DATASET_ARGS), DATASET_ARGS_TYPE);

    @SuppressWarnings("unchecked")
    BatchReadable<KEY, VALUE> batchReadable = createBatchReadable(context, datasetName, datasetArgs);
    SplitReader<KEY, VALUE> splitReader = batchReadable.createSplitReader(inputSplit.getSplit());
    return new SplitReaderRecordReader<>(splitReader);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchWritableOutputFormat.java

License:Apache License

@Override
public RecordWriter<KEY, VALUE> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String datasetName = conf.get(DATASET_NAME);
    Map<String, String> datasetArgs = GSON.fromJson(conf.get(DATASET_ARGS), DATASET_ARGS_TYPE);
    return new BatchWritableRecordWriter<>(createBatchWritable(context, datasetName, datasetArgs));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from  w w w  . ja v  a2s  . c om*/
public RecordReader<KEY, VALUE> createRecordReader(final InputSplit split, final TaskAttemptContext context)
        throws IOException, InterruptedException {

    DataSetInputSplit inputSplit = (DataSetInputSplit) split;

    Configuration conf = context.getConfiguration();
    // we don't currently allow datasets as the format between map and reduce stages, otherwise we'll have to
    // pass in the stage here instead of hardcoding mapper.
    MapReduceContextProvider contextProvider = new MapReduceContextProvider(context,
            MapReduceMetrics.TaskType.Mapper);
    BasicMapReduceContext mrContext = contextProvider.get();
    mrContext.getMetricsCollectionService().startAndWait();
    String dataSetName = getInputName(conf);
    BatchReadable<KEY, VALUE> inputDataset = mrContext.getDataset(dataSetName);
    SplitReader<KEY, VALUE> splitReader = inputDataset.createSplitReader(inputSplit.getSplit());

    // the record reader now owns the context and will close it
    return new DataSetRecordReader<>(splitReader, contextProvider);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetOutputFormat.java

License:Apache License

@Override
public RecordWriter<KEY, VALUE> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    // we don't currently allow datasets as the format between map and reduce stages, otherwise we'll have to
    // pass in the stage here instead of hardcoding reducer.
    MapReduceContextProvider contextProvider = new MapReduceContextProvider(context,
            MapReduceMetrics.TaskType.Reducer);
    BasicMapReduceContext mrContext = contextProvider.get();
    mrContext.getMetricsCollectionService().startAndWait();
    @SuppressWarnings("unchecked")
    BatchWritable<KEY, VALUE> dataset = mrContext.getDataset(getOutputDataSet(conf));

    // the record writer now owns the context and will close it
    return new DataSetRecordWriter<>(dataset, contextProvider);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration());
    return new DelegatingRecordReader<>(taggedInputSplit, context);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingRecordReader.java

License:Apache License

/**
 * Constructs the DelegatingRecordReader.
 *
 * @param taggedInputSplit TaggedInputSplit object
 * @param context TaskAttemptContext object
 *
 * @throws IOException/*from w  w  w  . j a  v a2s.  c om*/
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
public DelegatingRecordReader(TaggedInputSplit taggedInputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the TaggedInputSplit.
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    InputSplit inputSplit = taggedInputSplit.getInputSplit();
    originalRR = inputFormat.createRecordReader(inputSplit, context);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    MultiInputTaggedSplit taggedInputSplit = (MultiInputTaggedSplit) split;
    ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration());
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    InputSplit inputSplit = taggedInputSplit.getInputSplit();
    // we can't simply compute the underlying RecordReader and return it, because we need to override its
    // initialize method in order to initialize the underlying RecordReader with the underlying InputSplit
    // Find the InputFormat and then the RecordReader from the MultiInputTaggedSplit.
    return new DelegatingRecordReader<>(inputFormat.createRecordReader(inputSplit, context));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(String namedOutput) throws IOException, InterruptedException {
    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(namedOutput);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        TaskAttemptContext taskContext = getContext(namedOutput);

        Class<? extends OutputFormat<?, ?>> outputFormatClass;
        try {/* w w w .  j a  va  2 s .c o m*/
            outputFormatClass = taskContext.getOutputFormatClass();
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        ClassLoader outputFormatClassLoader = outputFormatClass.getClassLoader();
        // This is needed in case the OutputFormat's classloader conflicts with the program classloader (for example,
        // TableOutputFormat).
        ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(outputFormatClassLoader);

        try {
            // We use ReflectionUtils to instantiate the OutputFormat, because it also calls setConf on the object, if it
            // is a org.apache.hadoop.conf.Configurable.
            OutputFormat<?, ?> outputFormat = ReflectionUtils.newInstance(outputFormatClass,
                    taskContext.getConfiguration());
            writer = new MeteredRecordWriter<>(outputFormat.getRecordWriter(taskContext), context);
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }

        // add the record-writer to the cache
        recordWriters.put(namedOutput, writer);
    }
    return writer;
}