Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.cask.cdap.etl.batch.mapreduce.TransformRunner.java

License:Apache License

public TransformRunner(MapReduceTaskContext<Object, Object> context, Metrics metrics) throws Exception {
    JobContext jobContext = context.getHadoopContext();
    Configuration hConf = jobContext.getConfiguration();

    // figure out whether we are writing to a single output or to multiple outputs
    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    this.outputWriter = getSinkWriter(context, phaseSpec.getPhase(), hConf);

    // instantiate and initialize all transformations and setup the TransformExecutor
    PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(context, phaseSpec);
    // stage name -> runtime args for that stage
    Map<String, Map<String, String>> runtimeArgs = GSON.fromJson(hConf.get(ETLMapReduce.RUNTIME_ARGS_KEY),
            ETLMapReduce.RUNTIME_ARGS_TYPE);

    PipelinePhase phase = phaseSpec.getPhase();
    Set<StageInfo> aggregators = phase.getStagesOfType(BatchAggregator.PLUGIN_TYPE);
    if (!aggregators.isEmpty()) {
        String aggregatorName = aggregators.iterator().next().getName();
        // if we're in the mapper, get the part of the pipeline starting from sources and ending at aggregator
        if (jobContext instanceof Mapper.Context) {
            phase = phase.subsetTo(ImmutableSet.of(aggregatorName));
        } else {/*from www.j  a v a2s .  c o  m*/
            // if we're in the reducer, get the part of the pipeline starting from the aggregator and ending at sinks
            phase = phase.subsetFrom(ImmutableSet.of(aggregatorName));
        }
    }
    TransformExecutorFactory<KeyValue<KEY, VALUE>> transformExecutorFactory = new MapReduceTransformExecutorFactory<>(
            context, pluginInstantiator, metrics, runtimeArgs);
    this.transformExecutor = transformExecutorFactory.create(phase);

    // setup error dataset information
    this.transformsWithoutErrorDataset = new HashSet<>();
    this.transformErrorSinkMap = new HashMap<>();
    for (StageInfo transformInfo : phaseSpec.getPhase().getStagesOfType(Transform.PLUGIN_TYPE)) {
        String errorDatasetName = transformInfo.getErrorDatasetName();
        if (errorDatasetName != null) {
            transformErrorSinkMap.put(transformInfo.getName(),
                    new ErrorOutputWriter<>(context, errorDatasetName));
        }
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchReadableInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException, InterruptedException {
    // Decode splits from Configuration
    String splitsConf = context.getConfiguration().get(SPLITS);
    if (splitsConf == null) {
        throw new IOException("No input splits available from job configuration.");
    }//w  w w  .j a  va2s .  c  o m
    ByteArrayDataInput dataInput = ByteStreams.newDataInput(Bytes.toBytesBinary(splitsConf));
    int size = dataInput.readInt();
    List<InputSplit> splits = new ArrayList<>(size);
    for (int i = 0; i < size; i++) {
        DataSetInputSplit inputSplit = new DataSetInputSplit();
        inputSplit.readFields(dataInput);
        splits.add(inputSplit);
    }
    return splits;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchWritableOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    Configuration hConf = context.getConfiguration();
    if (hConf.get(DATASET_NAME) == null || hConf.get(DATASET_ARGS) == null) {
        throw new IOException("Dataset configurations are missing in the job configuration");
    }//ww  w. j a  v  a 2  s.  c  o  m
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException, InterruptedException {
    MapReduceContextConfig mrContextConfig = new MapReduceContextConfig(context.getConfiguration());
    List<Split> splits = mrContextConfig.getInputSelection();
    List<InputSplit> list = new ArrayList<>();
    for (Split split : splits) {
        list.add(new DataSetInputSplit(split));
    }// www.ja  v a2 s  . c om
    return list;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    List<InputSplit> splits = new ArrayList<>();
    Map<String, MultipleInputs.MapperInput> mapperInputMap = MultipleInputs.getInputMap(job.getConfiguration());

    for (Map.Entry<String, MultipleInputs.MapperInput> mapperInputEntry : mapperInputMap.entrySet()) {
        String inputName = mapperInputEntry.getKey();
        MultipleInputs.MapperInput mapperInput = mapperInputEntry.getValue();
        String mapperClassName = mapperInput.getMapperClassName();
        Job jobCopy = new Job(job.getConfiguration());
        Configuration confCopy = jobCopy.getConfiguration();

        // set configuration specific for this input onto the jobCopy
        ConfigurationUtil.setAll(mapperInput.getInputFormatConfiguration(), confCopy);

        Class<?> inputFormatClass = confCopy.getClassByNameOrNull(mapperInput.getInputFormatClassName());
        Preconditions.checkNotNull(inputFormatClass, "Class could not be found: ",
                mapperInput.getInputFormatClassName());
        InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, confCopy);

        // Get splits for each input path and tag with InputFormat
        // and Mapper types by wrapping in a TaggedInputSplit.
        List<InputSplit> formatSplits = inputFormat.getSplits(jobCopy);
        for (InputSplit split : formatSplits) {
            splits.add(new TaggedInputSplit(inputName, split, confCopy,
                    mapperInput.getInputFormatConfiguration(), inputFormat.getClass(), mapperClassName));
        }/*from w  w  w. ja v  a  2s . c om*/
    }
    return splits;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    List<InputSplit> splits = new ArrayList<>();
    Map<String, MultipleInputs.MapperInput> mapperInputMap = MultipleInputs.getInputMap(job.getConfiguration());

    for (Map.Entry<String, MultipleInputs.MapperInput> mapperInputEntry : mapperInputMap.entrySet()) {
        String inputName = mapperInputEntry.getKey();
        MultipleInputs.MapperInput mapperInput = mapperInputEntry.getValue();
        String mapperClassName = mapperInput.getMapperClassName();
        Job jobCopy = new Job(job.getConfiguration());
        Configuration confCopy = jobCopy.getConfiguration();

        // set configuration specific for this input onto the jobCopy
        ConfigurationUtil.setAll(mapperInput.getInputFormatConfiguration(), confCopy);

        Class<?> inputFormatClass = confCopy.getClassByNameOrNull(mapperInput.getInputFormatClassName());
        Preconditions.checkNotNull(inputFormatClass, "Class could not be found: ",
                mapperInput.getInputFormatClassName());

        InputFormat<K, V> inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, confCopy);
        //some input format need a jobId to getSplits
        jobCopy.setJobID(new JobID(inputName, inputName.hashCode()));

        // Get splits for each input path and tag with InputFormat
        // and Mapper types by wrapping in a MultiInputTaggedSplit.
        List<InputSplit> formatSplits = inputFormat.getSplits(jobCopy);
        for (InputSplit split : formatSplits) {
            splits.add(new MultiInputTaggedSplit(split, confCopy, inputName,
                    mapperInput.getInputFormatConfiguration(), inputFormat.getClass(), mapperClassName));
        }/* w  w  w  .jav a  2s.c o  m*/
    }
    return splits;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

static List<String> getNamedOutputsList(JobContext job) {
    Iterable<String> parts = Splitter.on(" ").omitEmptyStrings()
            .split(job.getConfiguration().get(MULTIPLE_OUTPUTS, ""));
    return Lists.newArrayList(parts);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

static Class<? extends OutputFormat> getNamedOutputFormatClass(JobContext job, String namedOutput) {
    return job.getConfiguration().getClass(MO_PREFIX + namedOutput + FORMAT, null, OutputFormat.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

private static Class<?> getNamedOutputKeyClass(JobContext job, String namedOutput) {
    return job.getConfiguration().getClass(MO_PREFIX + namedOutput + KEY, null, Object.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

private static Class<?> getNamedOutputValueClass(JobContext job, String namedOutput) {
    return job.getConfiguration().getClass(MO_PREFIX + namedOutput + VALUE, null, Object.class);
}