Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.cask.cdap.data.stream.StreamInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new StreamRecordReader<>(createStreamEventDecoder(context.getConfiguration()));
}

From source file:co.cask.cdap.data.stream.StreamRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    inputSplit = (StreamInputSplit) split;
    reader = createReader(FileSystem.get(context.getConfiguration()), inputSplit);
    reader.initialize();//w w w.  j  ava2 s  .c  o m
    readFilter = new TimeRangeReadFilter(inputSplit.getStartTime(), inputSplit.getEndTime());
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchReadableInputFormat.java

License:Apache License

@Override
public RecordReader<KEY, VALUE> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    DataSetInputSplit inputSplit = (DataSetInputSplit) split;
    Configuration conf = context.getConfiguration();

    String datasetName = conf.get(DATASET_NAME);
    Map<String, String> datasetArgs = GSON.fromJson(conf.get(DATASET_ARGS), DATASET_ARGS_TYPE);

    @SuppressWarnings("unchecked")
    BatchReadable<KEY, VALUE> batchReadable = createBatchReadable(context, datasetName, datasetArgs);
    SplitReader<KEY, VALUE> splitReader = batchReadable.createSplitReader(inputSplit.getSplit());
    return new SplitReaderRecordReader<>(splitReader);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchWritableOutputFormat.java

License:Apache License

@Override
public RecordWriter<KEY, VALUE> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String datasetName = conf.get(DATASET_NAME);
    Map<String, String> datasetArgs = GSON.fromJson(conf.get(DATASET_ARGS), DATASET_ARGS_TYPE);
    return new BatchWritableRecordWriter<>(createBatchWritable(context, datasetName, datasetArgs));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from  w w w  . ja v  a2s  . c om*/
public RecordReader<KEY, VALUE> createRecordReader(final InputSplit split, final TaskAttemptContext context)
        throws IOException, InterruptedException {

    DataSetInputSplit inputSplit = (DataSetInputSplit) split;

    Configuration conf = context.getConfiguration();
    // we don't currently allow datasets as the format between map and reduce stages, otherwise we'll have to
    // pass in the stage here instead of hardcoding mapper.
    MapReduceContextProvider contextProvider = new MapReduceContextProvider(context,
            MapReduceMetrics.TaskType.Mapper);
    BasicMapReduceContext mrContext = contextProvider.get();
    mrContext.getMetricsCollectionService().startAndWait();
    String dataSetName = getInputName(conf);
    BatchReadable<KEY, VALUE> inputDataset = mrContext.getDataset(dataSetName);
    SplitReader<KEY, VALUE> splitReader = inputDataset.createSplitReader(inputSplit.getSplit());

    // the record reader now owns the context and will close it
    return new DataSetRecordReader<>(splitReader, contextProvider);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetOutputFormat.java

License:Apache License

@Override
public RecordWriter<KEY, VALUE> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    // we don't currently allow datasets as the format between map and reduce stages, otherwise we'll have to
    // pass in the stage here instead of hardcoding reducer.
    MapReduceContextProvider contextProvider = new MapReduceContextProvider(context,
            MapReduceMetrics.TaskType.Reducer);
    BasicMapReduceContext mrContext = contextProvider.get();
    mrContext.getMetricsCollectionService().startAndWait();
    @SuppressWarnings("unchecked")
    BatchWritable<KEY, VALUE> dataset = mrContext.getDataset(getOutputDataSet(conf));

    // the record writer now owns the context and will close it
    return new DataSetRecordWriter<>(dataset, contextProvider);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration());
    return new DelegatingRecordReader<>(taggedInputSplit, context);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingRecordReader.java

License:Apache License

/**
 * Constructs the DelegatingRecordReader.
 *
 * @param taggedInputSplit TaggedInputSplit object
 * @param context TaskAttemptContext object
 *
 * @throws IOException/*from w  w  w  . j a  v a2s.  c om*/
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
public DelegatingRecordReader(TaggedInputSplit taggedInputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the TaggedInputSplit.
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    InputSplit inputSplit = taggedInputSplit.getInputSplit();
    originalRR = inputFormat.createRecordReader(inputSplit, context);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    MultiInputTaggedSplit taggedInputSplit = (MultiInputTaggedSplit) split;
    ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration());
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    InputSplit inputSplit = taggedInputSplit.getInputSplit();
    // we can't simply compute the underlying RecordReader and return it, because we need to override its
    // initialize method in order to initialize the underlying RecordReader with the underlying InputSplit
    // Find the InputFormat and then the RecordReader from the MultiInputTaggedSplit.
    return new DelegatingRecordReader<>(inputFormat.createRecordReader(inputSplit, context));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(String namedOutput) throws IOException, InterruptedException {
    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(namedOutput);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        TaskAttemptContext taskContext = getContext(namedOutput);

        Class<? extends OutputFormat<?, ?>> outputFormatClass;
        try {/* w w w .  j a  va  2 s .c o m*/
            outputFormatClass = taskContext.getOutputFormatClass();
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        ClassLoader outputFormatClassLoader = outputFormatClass.getClassLoader();
        // This is needed in case the OutputFormat's classloader conflicts with the program classloader (for example,
        // TableOutputFormat).
        ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(outputFormatClassLoader);

        try {
            // We use ReflectionUtils to instantiate the OutputFormat, because it also calls setConf on the object, if it
            // is a org.apache.hadoop.conf.Configurable.
            OutputFormat<?, ?> outputFormat = ReflectionUtils.newInstance(outputFormatClass,
                    taskContext.getConfiguration());
            writer = new MeteredRecordWriter<>(outputFormat.getRecordWriter(taskContext), context);
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }

        // add the record-writer to the cache
        recordWriters.put(namedOutput, writer);
    }
    return writer;
}