Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.cask.cdap.etl.batch.mapreduce.TransformRunner.java

License:Apache License

public TransformRunner(MapReduceTaskContext<Object, Object> context, Metrics metrics) throws Exception {
    JobContext jobContext = context.getHadoopContext();
    Configuration hConf = jobContext.getConfiguration();

    // figure out whether we are writing to a single output or to multiple outputs
    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    this.outputWriter = getSinkWriter(context, phaseSpec.getPhase(), hConf);

    // instantiate and initialize all transformations and setup the TransformExecutor
    PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(context, phaseSpec);
    // stage name -> runtime args for that stage
    Map<String, Map<String, String>> runtimeArgs = GSON.fromJson(hConf.get(ETLMapReduce.RUNTIME_ARGS_KEY),
            ETLMapReduce.RUNTIME_ARGS_TYPE);

    PipelinePhase phase = phaseSpec.getPhase();
    Set<StageInfo> aggregators = phase.getStagesOfType(BatchAggregator.PLUGIN_TYPE);
    if (!aggregators.isEmpty()) {
        String aggregatorName = aggregators.iterator().next().getName();
        // if we're in the mapper, get the part of the pipeline starting from sources and ending at aggregator
        if (jobContext instanceof Mapper.Context) {
            phase = phase.subsetTo(ImmutableSet.of(aggregatorName));
        } else {/*from www.j  a v a2s .  c o  m*/
            // if we're in the reducer, get the part of the pipeline starting from the aggregator and ending at sinks
            phase = phase.subsetFrom(ImmutableSet.of(aggregatorName));
        }
    }
    TransformExecutorFactory<KeyValue<KEY, VALUE>> transformExecutorFactory = new MapReduceTransformExecutorFactory<>(
            context, pluginInstantiator, metrics, runtimeArgs);
    this.transformExecutor = transformExecutorFactory.create(phase);

    // setup error dataset information
    this.transformsWithoutErrorDataset = new HashSet<>();
    this.transformErrorSinkMap = new HashMap<>();
    for (StageInfo transformInfo : phaseSpec.getPhase().getStagesOfType(Transform.PLUGIN_TYPE)) {
        String errorDatasetName = transformInfo.getErrorDatasetName();
        if (errorDatasetName != null) {
            transformErrorSinkMap.put(transformInfo.getName(),
                    new ErrorOutputWriter<>(context, errorDatasetName));
        }
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchReadableInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException, InterruptedException {
    // Decode splits from Configuration
    String splitsConf = context.getConfiguration().get(SPLITS);
    if (splitsConf == null) {
        throw new IOException("No input splits available from job configuration.");
    }//w  w w  .j a  va2s .  c  o m
    ByteArrayDataInput dataInput = ByteStreams.newDataInput(Bytes.toBytesBinary(splitsConf));
    int size = dataInput.readInt();
    List<InputSplit> splits = new ArrayList<>(size);
    for (int i = 0; i < size; i++) {
        DataSetInputSplit inputSplit = new DataSetInputSplit();
        inputSplit.readFields(dataInput);
        splits.add(inputSplit);
    }
    return splits;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchWritableOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    Configuration hConf = context.getConfiguration();
    if (hConf.get(DATASET_NAME) == null || hConf.get(DATASET_ARGS) == null) {
        throw new IOException("Dataset configurations are missing in the job configuration");
    }//ww  w. j a  v  a 2  s.  c  o  m
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException, InterruptedException {
    MapReduceContextConfig mrContextConfig = new MapReduceContextConfig(context.getConfiguration());
    List<Split> splits = mrContextConfig.getInputSelection();
    List<InputSplit> list = new ArrayList<>();
    for (Split split : splits) {
        list.add(new DataSetInputSplit(split));
    }// www.ja  v a2 s  . c om
    return list;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    List<InputSplit> splits = new ArrayList<>();
    Map<String, MultipleInputs.MapperInput> mapperInputMap = MultipleInputs.getInputMap(job.getConfiguration());

    for (Map.Entry<String, MultipleInputs.MapperInput> mapperInputEntry : mapperInputMap.entrySet()) {
        String inputName = mapperInputEntry.getKey();
        MultipleInputs.MapperInput mapperInput = mapperInputEntry.getValue();
        String mapperClassName = mapperInput.getMapperClassName();
        Job jobCopy = new Job(job.getConfiguration());
        Configuration confCopy = jobCopy.getConfiguration();

        // set configuration specific for this input onto the jobCopy
        ConfigurationUtil.setAll(mapperInput.getInputFormatConfiguration(), confCopy);

        Class<?> inputFormatClass = confCopy.getClassByNameOrNull(mapperInput.getInputFormatClassName());
        Preconditions.checkNotNull(inputFormatClass, "Class could not be found: ",
                mapperInput.getInputFormatClassName());
        InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, confCopy);

        // Get splits for each input path and tag with InputFormat
        // and Mapper types by wrapping in a TaggedInputSplit.
        List<InputSplit> formatSplits = inputFormat.getSplits(jobCopy);
        for (InputSplit split : formatSplits) {
            splits.add(new TaggedInputSplit(inputName, split, confCopy,
                    mapperInput.getInputFormatConfiguration(), inputFormat.getClass(), mapperClassName));
        }/*from w  w  w. ja v  a  2s . c om*/
    }
    return splits;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    List<InputSplit> splits = new ArrayList<>();
    Map<String, MultipleInputs.MapperInput> mapperInputMap = MultipleInputs.getInputMap(job.getConfiguration());

    for (Map.Entry<String, MultipleInputs.MapperInput> mapperInputEntry : mapperInputMap.entrySet()) {
        String inputName = mapperInputEntry.getKey();
        MultipleInputs.MapperInput mapperInput = mapperInputEntry.getValue();
        String mapperClassName = mapperInput.getMapperClassName();
        Job jobCopy = new Job(job.getConfiguration());
        Configuration confCopy = jobCopy.getConfiguration();

        // set configuration specific for this input onto the jobCopy
        ConfigurationUtil.setAll(mapperInput.getInputFormatConfiguration(), confCopy);

        Class<?> inputFormatClass = confCopy.getClassByNameOrNull(mapperInput.getInputFormatClassName());
        Preconditions.checkNotNull(inputFormatClass, "Class could not be found: ",
                mapperInput.getInputFormatClassName());

        InputFormat<K, V> inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, confCopy);
        //some input format need a jobId to getSplits
        jobCopy.setJobID(new JobID(inputName, inputName.hashCode()));

        // Get splits for each input path and tag with InputFormat
        // and Mapper types by wrapping in a MultiInputTaggedSplit.
        List<InputSplit> formatSplits = inputFormat.getSplits(jobCopy);
        for (InputSplit split : formatSplits) {
            splits.add(new MultiInputTaggedSplit(split, confCopy, inputName,
                    mapperInput.getInputFormatConfiguration(), inputFormat.getClass(), mapperClassName));
        }/* w  w  w  .jav a  2s.c o  m*/
    }
    return splits;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

static List<String> getNamedOutputsList(JobContext job) {
    Iterable<String> parts = Splitter.on(" ").omitEmptyStrings()
            .split(job.getConfiguration().get(MULTIPLE_OUTPUTS, ""));
    return Lists.newArrayList(parts);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

static Class<? extends OutputFormat> getNamedOutputFormatClass(JobContext job, String namedOutput) {
    return job.getConfiguration().getClass(MO_PREFIX + namedOutput + FORMAT, null, OutputFormat.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

private static Class<?> getNamedOutputKeyClass(JobContext job, String namedOutput) {
    return job.getConfiguration().getClass(MO_PREFIX + namedOutput + KEY, null, Object.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

private static Class<?> getNamedOutputValueClass(JobContext job, String namedOutput) {
    return job.getConfiguration().getClass(MO_PREFIX + namedOutput + VALUE, null, Object.class);
}