Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    Job jobCopy = new Job(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    Map<Path, String> formatMap = PangoolMultipleInputs.getInputFormatMap(job);
    Map<Path, String> mapperMap = PangoolMultipleInputs.getInputProcessorFileMap(job);

    for (Map.Entry<Path, String> entry : formatMap.entrySet()) {
        FileInputFormat.setInputPaths(jobCopy, entry.getKey());
        InputFormat inputFormat = InstancesDistributor.loadInstance(conf, InputFormat.class, entry.getValue(),
                true);//from   w  ww  .  java 2  s . c  om
        PangoolMultipleInputs.setSpecificInputContext(jobCopy.getConfiguration(), entry.getValue());
        List<InputSplit> pathSplits = inputFormat.getSplits(jobCopy);
        for (InputSplit pathSplit : pathSplits) {
            splits.add(new TaggedInputSplit(pathSplit, conf, entry.getValue(), mapperMap.get(entry.getKey())));
        }
    }

    return splits;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java

License:Apache License

/**
 * Add a {@link Path} with a custom {@link InputFormat} and {@link Mapper} to the list of inputs for the map-reduce
 * job. Returns the instance files created.
 * /* w  ww .  ja va 2s.c  om*/
 * @param job
 *          The {@link Job}
 * @param path
 *          {@link Path} to be added to the list of inputs for the job
 * @param inputFormat
 *          {@link InputFormat} class to use for this path
 * @param mapperInstance
 *          {@link Mapper} instance to use
 * @throws IOException
 * @throws FileNotFoundException
 */
public static Set<String> addInputPath(Job job, Path path, InputFormat inputFormat, Mapper mapperInstance,
        Map<String, String> specificContext) throws FileNotFoundException, IOException {

    Set<String> instanceFiles = new HashSet<String>();
    // Serialize the Mapper instance
    String uniqueNameMapper = UUID.randomUUID().toString() + '.' + "mapper.dat";
    try {
        InstancesDistributor.distribute(mapperInstance, uniqueNameMapper, job.getConfiguration());
        instanceFiles.add(uniqueNameMapper);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    // Serialize the Input Format
    String uniqueNameInputFormat = UUID.randomUUID().toString() + '.' + "inputFormat.dat";
    try {
        InstancesDistributor.distribute(inputFormat, uniqueNameInputFormat, job.getConfiguration());
        instanceFiles.add(uniqueNameInputFormat);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    for (Map.Entry<String, String> contextKeyValue : specificContext.entrySet()) {
        PangoolMultipleInputs.addInputContext(job, uniqueNameInputFormat, contextKeyValue.getKey(),
                contextKeyValue.getValue());
    }
    addInputPath(job, path, uniqueNameInputFormat);
    Configuration conf = job.getConfiguration();
    String mapperMapping = path.toString() + ";" + uniqueNameMapper;
    String mappers = conf.get(PANGOOL_INPUT_DIR_MAPPERS_CONF);
    conf.set(PANGOOL_INPUT_DIR_MAPPERS_CONF, mappers == null ? mapperMapping : mappers + "," + mapperMapping);
    job.setMapperClass(DelegatingMapper.class);
    return instanceFiles;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java

License:Apache License

private static void addInputPath(Job job, Path path, String inputFormatInstance) {
    /*/*from w ww .ja  v a  2s .co  m*/
     * Only internal -> not allowed to add inputs without associated InputProcessor files
     */
    String inputFormatMapping = path.toString() + ";" + inputFormatInstance;
    Configuration conf = job.getConfiguration();
    String inputFormats = conf.get(PANGOOL_INPUT_DIR_FORMATS_CONF);
    conf.set(PANGOOL_INPUT_DIR_FORMATS_CONF,
            inputFormats == null ? inputFormatMapping : inputFormats + "," + inputFormatMapping);

    job.setInputFormatClass(DelegatingInputFormat.class);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java

License:Apache License

/**
 * Specific (key, value) configurations for each Input. Some Input Formats read specific configuration values and act
 * based on them./*from  w w  w. ja v  a2 s . c  o  m*/
 */
public static void addInputContext(Job job, String inputName, String key, String value) {
    // Check that this named output has been configured before
    Configuration conf = job.getConfiguration();
    // Add specific configuration
    conf.set(MI_PREFIX + inputName + CONF + "." + key, value);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

/**
 * Adds a named output for the job. Returns the instance file that has been created.
 *//*from  www.  j  a v  a 2  s .  c  o  m*/
public static String addNamedOutput(Job job, String namedOutput, OutputFormat outputFormat, Class<?> keyClass,
        Class<?> valueClass) throws FileNotFoundException, IOException, URISyntaxException {
    checkNamedOutputName(job, namedOutput, true);
    Configuration conf = job.getConfiguration();
    String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat";
    InstancesDistributor.distribute(outputFormat, uniqueName, conf);
    conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput);
    conf.set(MO_PREFIX + namedOutput + FORMAT_INSTANCE_FILE, uniqueName);
    conf.setClass(MO_PREFIX + namedOutput + KEY, keyClass, Object.class);
    conf.setClass(MO_PREFIX + namedOutput + VALUE, valueClass, Object.class);
    return uniqueName;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

/**
 * Added this method for allowing specific (key, value) configurations for each Output. Some Output Formats read
 * specific configuration values and act based on them.
 * /*  ww  w  . j av  a 2 s .c o m*/
 * @param namedOutput
 * @param key
 * @param value
 */
public static void addNamedOutputContext(Job job, String namedOutput, String key, String value) {
    // Check that this named output has been configured before
    Configuration conf = job.getConfiguration();
    // Add specific configuration
    conf.set(MO_PREFIX + namedOutput + CONF + "." + key, value);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

/**
 * Iterates over the Configuration and sets the specific context found for the namedOutput in the Job instance.
 * Package-access so it can be unit tested. The specific context is configured in method this.
 * {@link #addNamedOutputContext(Job, String, String, String)}.
 * //from   ww w.  java  2 s  . c o m
 * @param conf
 *          The configuration that may contain specific context for the named output
 * @param job
 *          The Job where we will set the specific context
 * @param namedOutput
 *          The named output
 */
public static void setSpecificNamedOutputContext(Configuration conf, Job job, String namedOutput) {
    for (Map.Entry<String, String> entries : conf) {
        String confKey = entries.getKey();
        String confValue = entries.getValue();
        if (confKey.startsWith(MO_PREFIX + namedOutput + CONF)) {
            // Specific context key, value found
            String contextKey = confKey.substring((MO_PREFIX + namedOutput + CONF + ".").length(),
                    confKey.length());
            job.getConfiguration().set(contextKey, confValue);
        }
    }
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

/**
 * Enables or disables counters for the named outputs.
 * /*from w  ww .  ja  v a 2  s . c  om*/
 * The counters group is the {@link PangoolMultipleOutputs} class name. The names of the counters are the same as the
 * named outputs. These counters count the number records written to each output name. By default these counters are
 * disabled.
 * 
 * @param job
 *          job to enable counters
 * @param enabled
 *          indicates if the counters will be enabled or not.
 */
public static void setCountersEnabled(Job job, boolean enabled) {
    job.getConfiguration().setBoolean(COUNTERS_ENABLED, enabled);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

public synchronized RecordWriter getRecordWriter(String baseFileName) throws IOException, InterruptedException {

    // Look for record-writer in the cache
    OutputContext context = outputContexts.get(baseFileName);

    // If not in cache, create a new one
    if (context == null) {

        context = new OutputContext();

        OutputFormat mainOutputFormat;//from ww w . j  av  a2  s.com

        try {
            mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(this.context.getOutputFormatClass(),
                    this.context.getConfiguration()));
        } catch (ClassNotFoundException e1) {
            throw new RuntimeException(e1);
        }

        ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
                .getOutputCommitter(this.context));

        // The trick is to create a new Job for each output
        Job job = new Job(this.context.getConfiguration());
        job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
        job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
        // Check possible specific context for the output
        setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
        TaskAttemptContext taskContext;
        try {
            taskContext = TaskAttemptContextFactory.get(job.getConfiguration(),
                    this.context.getTaskAttemptID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        // First we change the output dir for the new OutputFormat that we will
        // create
        // We put it inside the main output work path -> in case the Job fails,
        // everything will be discarded
        taskContext.getConfiguration().set("mapred.output.dir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        // This is for Hadoop 2.0 :
        taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        context.taskAttemptContext = taskContext;

        // Load the OutputFormat instance
        OutputFormat outputFormat = InstancesDistributor.loadInstance(
                context.taskAttemptContext.getConfiguration(), OutputFormat.class,
                getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
        // We have to create a JobContext for meeting the contract of the
        // OutputFormat
        JobContext jobContext;
        try {
            jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        context.jobContext = jobContext;
        // The contract of the OutputFormat is to check the output specs
        outputFormat.checkOutputSpecs(jobContext);
        // We get the output committer so we can call it later
        context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
        // Save the RecordWriter to cache it
        context.recordWriter = outputFormat.getRecordWriter(taskContext);

        // if counters are enabled, wrap the writer with context
        // to increment counters
        if (countersEnabled) {
            context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName,
                    this.context);
        }

        outputContexts.put(baseFileName, context);
    }
    return context.recordWriter;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestPangoolMultipleOutputs.java

License:Apache License

@Test
public void testSpecificContext() throws IOException {
    // Test that we can add specific key, value configurations for each output
    Configuration conf = new Configuration();
    Job job = new Job(conf);
    PangoolMultipleOutputs.addNamedOutputContext(job, "foo", "my.context.property", "myValue");

    PangoolMultipleOutputs.setSpecificNamedOutputContext(job.getConfiguration(), job, "foo");
    Assert.assertEquals("myValue", job.getConfiguration().get("my.context.property"));
}