List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:co.cask.cdap.etl.batch.mapreduce.TransformRunner.java
License:Apache License
public TransformRunner(MapReduceTaskContext<Object, Object> context, Metrics metrics) throws Exception { JobContext jobContext = context.getHadoopContext(); Configuration hConf = jobContext.getConfiguration(); // figure out whether we are writing to a single output or to multiple outputs Map<String, String> properties = context.getSpecification().getProperties(); BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class); this.outputWriter = getSinkWriter(context, phaseSpec.getPhase(), hConf); // instantiate and initialize all transformations and setup the TransformExecutor PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(context, phaseSpec); // stage name -> runtime args for that stage Map<String, Map<String, String>> runtimeArgs = GSON.fromJson(hConf.get(ETLMapReduce.RUNTIME_ARGS_KEY), ETLMapReduce.RUNTIME_ARGS_TYPE); PipelinePhase phase = phaseSpec.getPhase(); Set<StageInfo> aggregators = phase.getStagesOfType(BatchAggregator.PLUGIN_TYPE); if (!aggregators.isEmpty()) { String aggregatorName = aggregators.iterator().next().getName(); // if we're in the mapper, get the part of the pipeline starting from sources and ending at aggregator if (jobContext instanceof Mapper.Context) { phase = phase.subsetTo(ImmutableSet.of(aggregatorName)); } else {/*from www.j a v a2s . c o m*/ // if we're in the reducer, get the part of the pipeline starting from the aggregator and ending at sinks phase = phase.subsetFrom(ImmutableSet.of(aggregatorName)); } } TransformExecutorFactory<KeyValue<KEY, VALUE>> transformExecutorFactory = new MapReduceTransformExecutorFactory<>( context, pluginInstantiator, metrics, runtimeArgs); this.transformExecutor = transformExecutorFactory.create(phase); // setup error dataset information this.transformsWithoutErrorDataset = new HashSet<>(); this.transformErrorSinkMap = new HashMap<>(); for (StageInfo transformInfo : phaseSpec.getPhase().getStagesOfType(Transform.PLUGIN_TYPE)) { String errorDatasetName = transformInfo.getErrorDatasetName(); if (errorDatasetName != null) { transformErrorSinkMap.put(transformInfo.getName(), new ErrorOutputWriter<>(context, errorDatasetName)); } } }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchReadableInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(final JobContext context) throws IOException, InterruptedException { // Decode splits from Configuration String splitsConf = context.getConfiguration().get(SPLITS); if (splitsConf == null) { throw new IOException("No input splits available from job configuration."); }//w w w .j a va2s . c o m ByteArrayDataInput dataInput = ByteStreams.newDataInput(Bytes.toBytesBinary(splitsConf)); int size = dataInput.readInt(); List<InputSplit> splits = new ArrayList<>(size); for (int i = 0; i < size; i++) { DataSetInputSplit inputSplit = new DataSetInputSplit(); inputSplit.readFields(dataInput); splits.add(inputSplit); } return splits; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchWritableOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { Configuration hConf = context.getConfiguration(); if (hConf.get(DATASET_NAME) == null || hConf.get(DATASET_ARGS) == null) { throw new IOException("Dataset configurations are missing in the job configuration"); }//ww w. j a v a 2 s. c o m }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(final JobContext context) throws IOException, InterruptedException { MapReduceContextConfig mrContextConfig = new MapReduceContextConfig(context.getConfiguration()); List<Split> splits = mrContextConfig.getInputSelection(); List<InputSplit> list = new ArrayList<>(); for (Split split : splits) { list.add(new DataSetInputSplit(split)); }// www.ja v a2 s . c om return list; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingInputFormat.java
License:Apache License
@SuppressWarnings("unchecked") public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { List<InputSplit> splits = new ArrayList<>(); Map<String, MultipleInputs.MapperInput> mapperInputMap = MultipleInputs.getInputMap(job.getConfiguration()); for (Map.Entry<String, MultipleInputs.MapperInput> mapperInputEntry : mapperInputMap.entrySet()) { String inputName = mapperInputEntry.getKey(); MultipleInputs.MapperInput mapperInput = mapperInputEntry.getValue(); String mapperClassName = mapperInput.getMapperClassName(); Job jobCopy = new Job(job.getConfiguration()); Configuration confCopy = jobCopy.getConfiguration(); // set configuration specific for this input onto the jobCopy ConfigurationUtil.setAll(mapperInput.getInputFormatConfiguration(), confCopy); Class<?> inputFormatClass = confCopy.getClassByNameOrNull(mapperInput.getInputFormatClassName()); Preconditions.checkNotNull(inputFormatClass, "Class could not be found: ", mapperInput.getInputFormatClassName()); InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, confCopy); // Get splits for each input path and tag with InputFormat // and Mapper types by wrapping in a TaggedInputSplit. List<InputSplit> formatSplits = inputFormat.getSplits(jobCopy); for (InputSplit split : formatSplits) { splits.add(new TaggedInputSplit(inputName, split, confCopy, mapperInput.getInputFormatConfiguration(), inputFormat.getClass(), mapperClassName)); }/*from w w w. ja v a 2s . c om*/ } return splits; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputFormat.java
License:Apache License
@SuppressWarnings("unchecked") public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { List<InputSplit> splits = new ArrayList<>(); Map<String, MultipleInputs.MapperInput> mapperInputMap = MultipleInputs.getInputMap(job.getConfiguration()); for (Map.Entry<String, MultipleInputs.MapperInput> mapperInputEntry : mapperInputMap.entrySet()) { String inputName = mapperInputEntry.getKey(); MultipleInputs.MapperInput mapperInput = mapperInputEntry.getValue(); String mapperClassName = mapperInput.getMapperClassName(); Job jobCopy = new Job(job.getConfiguration()); Configuration confCopy = jobCopy.getConfiguration(); // set configuration specific for this input onto the jobCopy ConfigurationUtil.setAll(mapperInput.getInputFormatConfiguration(), confCopy); Class<?> inputFormatClass = confCopy.getClassByNameOrNull(mapperInput.getInputFormatClassName()); Preconditions.checkNotNull(inputFormatClass, "Class could not be found: ", mapperInput.getInputFormatClassName()); InputFormat<K, V> inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, confCopy); //some input format need a jobId to getSplits jobCopy.setJobID(new JobID(inputName, inputName.hashCode())); // Get splits for each input path and tag with InputFormat // and Mapper types by wrapping in a MultiInputTaggedSplit. List<InputSplit> formatSplits = inputFormat.getSplits(jobCopy); for (InputSplit split : formatSplits) { splits.add(new MultiInputTaggedSplit(split, confCopy, inputName, mapperInput.getInputFormatConfiguration(), inputFormat.getClass(), mapperClassName)); }/* w w w .jav a 2s.c o m*/ } return splits; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java
License:Apache License
static List<String> getNamedOutputsList(JobContext job) { Iterable<String> parts = Splitter.on(" ").omitEmptyStrings() .split(job.getConfiguration().get(MULTIPLE_OUTPUTS, "")); return Lists.newArrayList(parts); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java
License:Apache License
static Class<? extends OutputFormat> getNamedOutputFormatClass(JobContext job, String namedOutput) { return job.getConfiguration().getClass(MO_PREFIX + namedOutput + FORMAT, null, OutputFormat.class); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java
License:Apache License
private static Class<?> getNamedOutputKeyClass(JobContext job, String namedOutput) { return job.getConfiguration().getClass(MO_PREFIX + namedOutput + KEY, null, Object.class); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java
License:Apache License
private static Class<?> getNamedOutputValueClass(JobContext job, String namedOutput) { return job.getConfiguration().getClass(MO_PREFIX + namedOutput + VALUE, null, Object.class); }