Example usage for org.apache.hadoop.mapreduce Job getConfiguration

List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration() 

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    List<InputSplit> splits = new ArrayList<>();
    Map<String, MultipleInputs.MapperInput> mapperInputMap = MultipleInputs.getInputMap(job.getConfiguration());

    for (Map.Entry<String, MultipleInputs.MapperInput> mapperInputEntry : mapperInputMap.entrySet()) {
        String inputName = mapperInputEntry.getKey();
        MultipleInputs.MapperInput mapperInput = mapperInputEntry.getValue();
        String mapperClassName = mapperInput.getMapperClassName();
        Job jobCopy = new Job(job.getConfiguration());
        Configuration confCopy = jobCopy.getConfiguration();

        // set configuration specific for this input onto the jobCopy
        ConfigurationUtil.setAll(mapperInput.getInputFormatConfiguration(), confCopy);

        Class<?> inputFormatClass = confCopy.getClassByNameOrNull(mapperInput.getInputFormatClassName());
        Preconditions.checkNotNull(inputFormatClass, "Class could not be found: ",
                mapperInput.getInputFormatClassName());
        InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, confCopy);

        // Get splits for each input path and tag with InputFormat
        // and Mapper types by wrapping in a TaggedInputSplit.
        List<InputSplit> formatSplits = inputFormat.getSplits(jobCopy);
        for (InputSplit split : formatSplits) {
            splits.add(new TaggedInputSplit(inputName, split, confCopy,
                    mapperInput.getInputFormatConfiguration(), inputFormat.getClass(), mapperClassName));
        }/*from   www . j a  v a  2  s  .  c o  m*/
    }
    return splits;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    List<InputSplit> splits = new ArrayList<>();
    Map<String, MultipleInputs.MapperInput> mapperInputMap = MultipleInputs.getInputMap(job.getConfiguration());

    for (Map.Entry<String, MultipleInputs.MapperInput> mapperInputEntry : mapperInputMap.entrySet()) {
        String inputName = mapperInputEntry.getKey();
        MultipleInputs.MapperInput mapperInput = mapperInputEntry.getValue();
        String mapperClassName = mapperInput.getMapperClassName();
        Job jobCopy = new Job(job.getConfiguration());
        Configuration confCopy = jobCopy.getConfiguration();

        // set configuration specific for this input onto the jobCopy
        ConfigurationUtil.setAll(mapperInput.getInputFormatConfiguration(), confCopy);

        Class<?> inputFormatClass = confCopy.getClassByNameOrNull(mapperInput.getInputFormatClassName());
        Preconditions.checkNotNull(inputFormatClass, "Class could not be found: ",
                mapperInput.getInputFormatClassName());

        InputFormat<K, V> inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, confCopy);
        //some input format need a jobId to getSplits
        jobCopy.setJobID(new JobID(inputName, inputName.hashCode()));

        // Get splits for each input path and tag with InputFormat
        // and Mapper types by wrapping in a MultiInputTaggedSplit.
        List<InputSplit> formatSplits = inputFormat.getSplits(jobCopy);
        for (InputSplit split : formatSplits) {
            splits.add(new MultiInputTaggedSplit(split, confCopy, inputName,
                    mapperInput.getInputFormatConfiguration(), inputFormat.getClass(), mapperClassName));
        }//from w  w w  . j  a  va  2  s.  c om
    }
    return splits;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultipleInputs.java

License:Apache License

/**
 * Add a {@link Path} with a custom {@link InputFormat} and
 * {@link Mapper} to the list of inputs for the map-reduce job.
 *
 * @param job The {@link Job}/*from   w  w w  .  jav  a2s  . c  o  m*/
 * @param namedInput name of the input
 * @param inputFormatClass the name of the InputFormat class to be used for this input
 * @param inputConfigs the configurations to be used for this input
 * @param mapperClass {@link Mapper} class to use for this path
 */
@SuppressWarnings("unchecked")
public static void addInput(Job job, String namedInput, String inputFormatClass,
        Map<String, String> inputConfigs, Class<? extends Mapper> mapperClass) {
    Configuration conf = job.getConfiguration();

    Map<String, MapperInput> map = getInputMap(conf);
    // this shouldn't happen, because it is already protected against in BasicMapReduceContext#addInput
    if (map.containsKey(namedInput)) {
        throw new IllegalArgumentException("Input already configured: " + namedInput);
    }
    map.put(namedInput, new MapperInput(inputFormatClass, inputConfigs, mapperClass));
    conf.set(INPUT_CONFIGS, GSON.toJson(map));

    job.setInputFormatClass(DelegatingInputFormat.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultipleInputsTest.java

License:Apache License

@Test
public void testConfigurations() throws IOException, ClassNotFoundException {
    Job job = Job.getInstance();

    String inputName1 = "inputName1";
    String inputFormatClass1 = TextInputFormat.class.getName();
    Map<String, String> inputFormatConfigs1 = ImmutableMap.of("key1", "val1", "key2", "val2");
    MultipleInputs.addInput(job, inputName1, inputFormatClass1, inputFormatConfigs1, job.getMapperClass());

    Map<String, MultipleInputs.MapperInput> map = MultipleInputs.getInputMap(job.getConfiguration());

    Assert.assertEquals(1, map.size());/*from   w w w.j a  v a2 s  . co  m*/
    Assert.assertEquals(inputName1, Iterables.getOnlyElement(map.keySet()));
    Assert.assertEquals(inputFormatClass1, Iterables.getOnlyElement(map.values()).getInputFormatClassName());
    Assert.assertEquals(inputFormatConfigs1,
            Iterables.getOnlyElement(map.values()).getInputFormatConfiguration());
    Assert.assertEquals(job.getMapperClass().getName(),
            Iterables.getOnlyElement(map.values()).getMapperClassName());

    Assert.assertEquals(DelegatingInputFormat.class, job.getInputFormatClass());

    // now, test with two inputs in the configuration
    String inputName2 = "inputName2";
    String inputFormatClass2 = TextInputFormat.class.getName();
    Map<String, String> inputFormatConfigs2 = ImmutableMap.of("some_key1", "some_val1", "some_key2",
            "some_val2");
    MultipleInputs.addInput(job, inputName2, inputFormatClass2, inputFormatConfigs2, CustomMapper.class);

    map = MultipleInputs.getInputMap(job.getConfiguration());

    Assert.assertEquals(2, map.size());

    MultipleInputs.MapperInput mapperInput1 = map.get(inputName1);
    Assert.assertEquals(inputFormatClass1, mapperInput1.getInputFormatClassName());
    Assert.assertEquals(inputFormatConfigs1, mapperInput1.getInputFormatConfiguration());
    Assert.assertEquals(job.getMapperClass().getName(), mapperInput1.getMapperClassName());

    MultipleInputs.MapperInput mapperInput2 = map.get(inputName2);
    Assert.assertEquals(inputFormatClass2, mapperInput2.getInputFormatClassName());
    Assert.assertEquals(inputFormatConfigs2, mapperInput2.getInputFormatConfiguration());
    Assert.assertEquals(CustomMapper.class,
            job.getConfiguration().getClassByName(mapperInput2.getMapperClassName()));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

/**
 * Adds a named output for the job.//  w w w.  j  a  v a 2 s .  com
 *
 * @param job               job to add the named output
 * @param namedOutput       named output name, it has to be a word, letters
 *                          and numbers only (alphanumeric)
 * @param outputFormatClass name of the OutputFormat class.
 * @param keyClass          key class
 * @param valueClass        value class
 * @param outputConfigs     configurations for the output
 */
@SuppressWarnings("unchecked")
public static void addNamedOutput(Job job, String namedOutput, String outputFormatClass, Class<?> keyClass,
        Class<?> valueClass, Map<String, String> outputConfigs) {
    assertValidName(namedOutput);
    checkNamedOutputName(namedOutput, getNamedOutputsList(job), false);
    Configuration conf = job.getConfiguration();
    conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput);
    conf.set(MO_PREFIX + namedOutput + FORMAT, outputFormatClass);
    conf.setClass(MO_PREFIX + namedOutput + KEY, keyClass, Object.class);
    conf.setClass(MO_PREFIX + namedOutput + VALUE, valueClass, Object.class);
    ConfigurationUtil.setNamedConfigurations(conf, computePrefixName(namedOutput), outputConfigs);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

static TaskAttemptContext getNamedTaskContext(TaskAttemptContext context, String namedOutput)
        throws IOException {
    Job job = getNamedJob(context, namedOutput);
    return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID(),
            new WrappedStatusReporter(context));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

static JobContext getNamedJobContext(JobContext context, String namedOutput) throws IOException {
    Job job = getNamedJob(context, namedOutput);
    return new JobContextImpl(job.getConfiguration(), job.getJobID());
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

private static Job getNamedJob(JobContext context, String namedOutput) throws IOException {
    // The following trick leverages the instantiation of a record writer via
    // the job thus supporting arbitrary output formats.
    Job job = Job.getInstance(context.getConfiguration());
    job.setOutputFormatClass(getNamedOutputFormatClass(context, namedOutput));
    job.setOutputKeyClass(getNamedOutputKeyClass(context, namedOutput));
    job.setOutputValueClass(getNamedOutputValueClass(context, namedOutput));

    Configuration conf = job.getConfiguration();
    Map<String, String> namedConfigurations = ConfigurationUtil
            .getNamedConfigurations(context.getConfiguration(), computePrefixName(namedOutput));
    ConfigurationUtil.setAll(namedConfigurations, conf);
    return job;/*w  ww.  j  a  va  2 s  .com*/
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputsMainOutputWrapper.java

License:Apache License

/**
 * Sets an OutputFormat class as the root OutputFormat for the Hadoop job.
 *
 * @param job the job on which to set the OutputFormat class
 * @param outputFormatClass the class to set as the root OutputFormat for the job
 * @param outputConfig the configuration to set for the specified OutputFormat
 *//*  w w w . j a v a2 s .com*/
public static void setRootOutputFormat(Job job, String outputFormatClass, Map<String, String> outputConfig) {
    job.getConfiguration().set(ROOT_OUTPUT_FORMAT, outputFormatClass);

    for (Map.Entry<String, String> confEntry : outputConfig.entrySet()) {
        job.getConfiguration().set(confEntry.getKey(), confEntry.getValue());
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitionerWriterWrapper.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName)
        throws IOException {
    Job job = new Job(context.getConfiguration());
    DynamicPartitioningOutputFormat.setOutputName(job, newOutputName);
    // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because
    // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat.
    if (isAvroOutputFormat(getFileOutputFormat(context))) {
        job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName);
    }//  w  w w .  ja  v  a  2  s  .  c o m

    Path jobOutputPath = DynamicPartitioningOutputFormat
            .createJobSpecificPath(FileOutputFormat.getOutputPath(job), context);
    DynamicPartitioningOutputFormat.setOutputPath(job, jobOutputPath);

    return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID());
}