Example usage for org.apache.hadoop.mapreduce Job getConfiguration

List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration() 

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName)
        throws IOException {
    Job job = new Job(context.getConfiguration());
    FileOutputFormat.setOutputName(job, newOutputName);
    // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because
    // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat.
    if (isAvroOutputFormat(getFileOutputFormat(context))) {
        job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName);
    }//from w  w w. j  av a2  s. com

    Path jobOutputPath = createJobSpecificPath(FileOutputFormat.getOutputPath(job), context);
    FileOutputFormat.setOutputPath(job, jobOutputPath);

    return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID());
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapperWrapper.java

License:Apache License

/**
 * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined.
 * @param job The MapReduce job//from   ww  w . ja  va2 s.c om
 */
public static void wrap(Job job) {
    // NOTE: we don't use job.getMapperClass() as we don't need to load user class here
    Configuration conf = job.getConfiguration();
    String mapClass = conf.get(MRJobConfig.MAP_CLASS_ATTR, Mapper.class.getName());
    conf.set(MapperWrapper.ATTR_MAPPER_CLASS, mapClass);
    job.setMapperClass(MapperWrapper.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

@Override
protected void startUp() throws Exception {
    // Creates a temporary directory locally for storing all generated files.
    File tempDir = createTempDirectory();
    cleanupTask = createCleanupTask(tempDir);

    try {/*from   w w w.jav a  2 s .  com*/
        Job job = createJob(new File(tempDir, "mapreduce"));
        Configuration mapredConf = job.getConfiguration();

        classLoader = new MapReduceClassLoader(injector, cConf, mapredConf,
                context.getProgram().getClassLoader(), context.getPlugins(), context.getPluginInstantiator());
        cleanupTask = createCleanupTask(cleanupTask, classLoader);

        mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader));
        ClassLoaders.setContextClassLoader(mapredConf.getClassLoader());

        context.setJob(job);

        beforeSubmit(job);

        // Localize additional resources that users have requested via BasicMapReduceContext.localize methods
        Map<String, String> localizedUserResources = localizeUserResources(job, tempDir);

        // Override user-defined job name, since we set it and depend on the name.
        // https://issues.cask.co/browse/CDAP-2441
        String jobName = job.getJobName();
        if (!jobName.isEmpty()) {
            LOG.warn("Job name {} is being overridden.", jobName);
        }
        job.setJobName(getJobName(context));

        // Create a temporary location for storing all generated files through the LocationFactory.
        Location tempLocation = createTempLocationDirectory();
        cleanupTask = createCleanupTask(cleanupTask, tempLocation);

        // For local mode, everything is in the configuration classloader already, hence no need to create new jar
        if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // After calling beforeSubmit, we know what plugins are needed for the program, hence construct the proper
            // ClassLoader from here and use it for setting up the job
            Location pluginArchive = createPluginArchive(tempLocation);
            if (pluginArchive != null) {
                job.addCacheArchive(pluginArchive.toURI());
                mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName());
            }
        }

        // set resources for the job
        TaskType.MAP.setResources(mapredConf, context.getMapperResources());
        TaskType.REDUCE.setResources(mapredConf, context.getReducerResources());

        // replace user's Mapper & Reducer's with our wrappers in job config
        MapperWrapper.wrap(job);
        ReducerWrapper.wrap(job);

        // packaging job jar which includes cdap classes with dependencies
        File jobJar = buildJobJar(job, tempDir);
        job.setJar(jobJar.toURI().toString());

        Location programJar = programJarLocation;
        if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // Copy and localize the program jar in distributed mode
            programJar = copyProgramJar(tempLocation);
            job.addCacheFile(programJar.toURI());

            List<String> classpath = new ArrayList<>();

            // Localize logback.xml
            Location logbackLocation = createLogbackJar(tempLocation);
            if (logbackLocation != null) {
                job.addCacheFile(logbackLocation.toURI());
                classpath.add(logbackLocation.getName());
            }

            // Generate and localize the launcher jar to control the classloader of MapReduce containers processes
            classpath.add("job.jar/lib/*");
            classpath.add("job.jar/classes");
            Location launcherJar = createLauncherJar(
                    Joiner.on(",").join(MapReduceContainerHelper.getMapReduceClassPath(mapredConf, classpath)),
                    tempLocation);
            job.addCacheFile(launcherJar.toURI());

            // The only thing in the container classpath is the launcher.jar
            // The MapReduceContainerLauncher inside the launcher.jar will creates a MapReduceClassLoader and launch
            // the actual MapReduce AM/Task from that
            // We explicitly localize the mr-framwork, but not use it with the classpath
            URI frameworkURI = MapReduceContainerHelper.getFrameworkURI(mapredConf);
            if (frameworkURI != null) {
                job.addCacheArchive(frameworkURI);
            }

            mapredConf.unset(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
            mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, launcherJar.getName());
            mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, launcherJar.getName());
        }

        MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf);
        // We start long-running tx to be used by mapreduce job tasks.
        Transaction tx = txClient.startLong();
        try {
            // We remember tx, so that we can re-use it in mapreduce tasks
            CConfiguration cConfCopy = cConf;
            contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources);

            LOG.info("Submitting MapReduce Job: {}", context);
            // submits job and returns immediately. Shouldn't need to set context ClassLoader.
            job.submit();

            this.job = job;
            this.transaction = tx;
        } catch (Throwable t) {
            Transactions.invalidateQuietly(txClient, tx);
            throw t;
        }
    } catch (Throwable t) {
        LOG.error("Exception when submitting MapReduce Job: {}", context, t);
        cleanupTask.run();
        throw t;
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Creates a MapReduce {@link Job} instance.
 *
 * @param hadoopTmpDir directory for the "hadoop.tmp.dir" configuration
 *///w  w  w. j a  v a2  s  .c  om
private Job createJob(File hadoopTmpDir) throws IOException {
    Job job = Job.getInstance(new Configuration(hConf));
    Configuration jobConf = job.getConfiguration();

    if (MapReduceTaskContextProvider.isLocal(jobConf)) {
        // Set the MR framework local directories inside the given tmp directory.
        // Setting "hadoop.tmp.dir" here has no effect due to Explore Service need to set "hadoop.tmp.dir"
        // as system property for Hive to work in local mode. The variable substitution of hadoop conf
        // gives system property the highest precedence.
        jobConf.set("mapreduce.cluster.local.dir", new File(hadoopTmpDir, "local").getAbsolutePath());
        jobConf.set("mapreduce.jobtracker.system.dir", new File(hadoopTmpDir, "system").getAbsolutePath());
        jobConf.set("mapreduce.jobtracker.staging.root.dir",
                new File(hadoopTmpDir, "staging").getAbsolutePath());
        jobConf.set("mapreduce.cluster.temp.dir", new File(hadoopTmpDir, "temp").getAbsolutePath());
    }

    if (UserGroupInformation.isSecurityEnabled()) {
        // If runs in secure cluster, this program runner is running in a yarn container, hence not able
        // to get authenticated with the history.
        jobConf.unset("mapreduce.jobhistory.address");
        jobConf.setBoolean(Job.JOB_AM_ACCESS_DISABLED, false);

        Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
        LOG.info("Running in secure mode; adding all user credentials: {}", credentials.getAllTokens());
        job.getCredentials().addAll(credentials);
    }
    return job;
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Sets the configurations used for inputs.
 * Multiple mappers could be defined, so we first check that their output types are consistent.
 *
 * @return the TypeToken for one of the mappers (doesn't matter which one, since we check that all of their output
 * key/value types are consistent. Returns null if the mapper class was not configured directly on the job and the
 * job's mapper class is to be used./*w  ww  .ja va 2 s  . c om*/
 * @throws IllegalArgumentException if any of the configured mapper output types are inconsistent.
 */
@Nullable
private TypeToken<Mapper> setInputsIfNeeded(Job job) throws IOException, ClassNotFoundException {
    Class<? extends Mapper> jobMapperClass = job.getMapperClass();

    Class<? extends Mapper> firstMapperClass = null;
    Map.Entry<Class, Class> firstMapperOutputTypes = null;

    for (Map.Entry<String, MapperInput> mapperInputEntry : context.getMapperInputs().entrySet()) {
        MapperInput mapperInput = mapperInputEntry.getValue();
        InputFormatProvider provider = mapperInput.getInputFormatProvider();
        Map<String, String> inputFormatConfiguration = new HashMap<>(provider.getInputFormatConfiguration());

        // default to what is configured on the job, if user didn't specify a mapper for an input
        Class<? extends Mapper> mapperClass = mapperInput.getMapper() == null ? jobMapperClass
                : mapperInput.getMapper();

        // check output key/value type consistency, except for the first input
        if (firstMapperClass == null) {
            firstMapperClass = mapperClass;
            firstMapperOutputTypes = getMapperOutputKeyValueTypes(mapperClass);
        } else {
            assertConsistentTypes(firstMapperClass, firstMapperOutputTypes, mapperClass);
        }

        // A bit hacky for stream.
        if (provider instanceof StreamInputFormatProvider) {
            // pass in mapperInput.getMapper() instead of mapperClass, because mapperClass defaults to the Identity Mapper
            setDecoderForStream((StreamInputFormatProvider) provider, job, inputFormatConfiguration,
                    mapperInput.getMapper());
        }

        MultipleInputs.addInput(job, mapperInputEntry.getKey(), provider.getInputFormatClassName(),
                inputFormatConfiguration, mapperClass);
    }

    // if firstMapperClass is null, then, user is not going through our APIs to add input; leave the job's input format
    // to user and simply return the mapper output types of the mapper configured on the job.
    // if firstMapperClass == jobMapperClass, return null if the user didn't configure the mapper class explicitly
    if (firstMapperClass == null || firstMapperClass == jobMapperClass) {
        return resolveClass(job.getConfiguration(), MRJobConfig.MAP_CLASS_ATTR, Mapper.class);
    }
    return resolveClass(firstMapperClass, Mapper.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

private void setDecoderForStream(StreamInputFormatProvider streamProvider, Job job,
        Map<String, String> inputFormatConfiguration, Class<? extends Mapper> mapperClass) {
    // For stream, we need to do two extra steps.
    // 1. stream usage registration since it only happens on client side.
    // 2. Infer the stream event decoder from Mapper/Reducer
    TypeToken<?> mapperTypeToken = mapperClass == null ? null : resolveClass(mapperClass, Mapper.class);
    Type inputValueType = getInputValueType(job.getConfiguration(), StreamEvent.class, mapperTypeToken);
    streamProvider.setDecoderType(inputFormatConfiguration, inputValueType);

    Id.Stream streamId = streamProvider.getStreamId();
    try {// ww w  .ja v  a 2 s . co m
        usageRegistry.register(context.getProgram().getId(), streamId);
        streamAdmin.addAccess(new Id.Run(context.getProgram().getId(), context.getRunId().getId()), streamId,
                AccessType.READ);
    } catch (Exception e) {
        LOG.warn("Failed to register usage {} -> {}", context.getProgram().getId(), streamId, e);
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Sets the configurations used for outputs.
 *///w w  w . j a  va 2s.c o  m
private void setOutputsIfNeeded(Job job) {
    Map<String, OutputFormatProvider> outputFormatProviders = context.getOutputFormatProviders();
    LOG.debug("Using as output for MapReduce Job: {}", outputFormatProviders.keySet());

    if (outputFormatProviders.isEmpty()) {
        // user is not going through our APIs to add output; leave the job's output format to user
        return;
    } else if (outputFormatProviders.size() == 1) {
        // If only one output is configured through the context, then set it as the root OutputFormat
        Map.Entry<String, OutputFormatProvider> next = outputFormatProviders.entrySet().iterator().next();
        OutputFormatProvider outputFormatProvider = next.getValue();
        ConfigurationUtil.setAll(outputFormatProvider.getOutputFormatConfiguration(), job.getConfiguration());
        job.getConfiguration().set(Job.OUTPUT_FORMAT_CLASS_ATTR,
                outputFormatProvider.getOutputFormatClassName());
        return;
    }
    // multiple output formats configured via the context. We should use a RecordWriter that doesn't support writing
    // as the root output format in this case to disallow writing directly on the context
    MultipleOutputsMainOutputWrapper.setRootOutputFormat(job, UnsupportedOutputFormat.class.getName(),
            new HashMap<String, String>());
    job.setOutputFormatClass(MultipleOutputsMainOutputWrapper.class);

    for (Map.Entry<String, OutputFormatProvider> entry : outputFormatProviders.entrySet()) {
        String outputName = entry.getKey();
        OutputFormatProvider outputFormatProvider = entry.getValue();

        String outputFormatClassName = outputFormatProvider.getOutputFormatClassName();
        if (outputFormatClassName == null) {
            throw new IllegalArgumentException(
                    "Output '" + outputName + "' provided null as the output format");
        }

        Map<String, String> outputConfig = outputFormatProvider.getOutputFormatConfiguration();
        MultipleOutputs.addNamedOutput(job, outputName, outputFormatClassName, job.getOutputKeyClass(),
                job.getOutputValueClass(), outputConfig);

    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop.
 *
 * @return a new {@link File} containing the job jar
 *//*  w w w  .j av a 2s .c om*/
private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException {
    File jobJar = new File(tempDir, "job.jar");
    LOG.debug("Creating Job jar: {}", jobJar);

    // For local mode, nothing is needed in the job jar since we use the classloader in the configuration object.
    if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) {
        JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar));
        output.close();
        return jobJar;
    }

    // Excludes libraries that are for sure not needed.
    // Hadoop - Available from the cluster
    // Spark - MR never uses Spark
    final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder();
    ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() {
        @Override
        public boolean accept(String className, URL classUrl, URL classPathUrl) {
            if (className.startsWith("org.apache.spark")
                    || classPathUrl.toString().contains("spark-assembly")) {
                return false;
            }
            return hadoopClassExcluder.accept(className, classUrl, classPathUrl);
        }
    });
    Set<Class<?>> classes = Sets.newHashSet();
    classes.add(MapReduce.class);
    classes.add(MapperWrapper.class);
    classes.add(ReducerWrapper.class);

    // We only need to trace the Input/OutputFormat class due to MAPREDUCE-5957 so that those classes are included
    // in the job.jar and be available in the MR system classpath before our job classloader (ApplicationClassLoader)
    // take over the classloading.
    if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) {
        try {
            Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass();
            LOG.info("InputFormat class: {} {}", inputFormatClass, inputFormatClass.getClassLoader());
            classes.add(inputFormatClass);

            // If it is StreamInputFormat, also add the StreamEventCodec class as well.
            if (StreamInputFormat.class.isAssignableFrom(inputFormatClass)) {
                Class<? extends StreamEventDecoder> decoderType = StreamInputFormat
                        .getDecoderClass(job.getConfiguration());
                if (decoderType != null) {
                    classes.add(decoderType);
                }
            }
        } catch (Throwable t) {
            LOG.info("InputFormat class not found: {}", t.getMessage(), t);
            // Ignore
        }
        try {
            Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass();
            LOG.info("OutputFormat class: {} {}", outputFormatClass, outputFormatClass.getClassLoader());
            classes.add(outputFormatClass);
        } catch (Throwable t) {
            LOG.info("OutputFormat class not found: {}", t.getMessage(), t);
            // Ignore
        }
    }
    // End of MAPREDUCE-5957.

    try {
        Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass();
        classes.add(hbaseTableUtilClass);
    } catch (ProvisionException e) {
        LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available");
    }

    ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(job.getConfiguration().getClassLoader());
    appBundler.createBundle(Locations.toLocation(jobJar), classes);
    ClassLoaders.setContextClassLoader(oldCLassLoader);

    LOG.info("Built MapReduce Job Jar at {}", jobJar.toURI());
    return jobJar;
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Sets the output key and value classes in the job configuration by inspecting the {@link Mapper} and {@link Reducer}
 * if it is not set by the user.//from w  ww .  j  a  v  a  2s.c om
 *
 * @param job the MapReduce job
 * @param mapperTypeToken TypeToken of a configured mapper (may not be configured on the job). Has already been
 *                        resolved from the job's mapper class.
 */
private void setOutputClassesIfNeeded(Job job, @Nullable TypeToken<?> mapperTypeToken) {
    Configuration conf = job.getConfiguration();

    // Try to get the type from reducer
    TypeToken<?> type = resolveClass(conf, MRJobConfig.REDUCE_CLASS_ATTR, Reducer.class);

    if (type == null) {
        // Map only job
        type = mapperTypeToken;
    }

    // If not able to detect type, nothing to set
    if (type == null || !(type.getType() instanceof ParameterizedType)) {
        return;
    }

    Type[] typeArgs = ((ParameterizedType) type.getType()).getActualTypeArguments();

    // Set it only if the user didn't set it in beforeSubmit
    // The key and value type are in the 3rd and 4th type parameters
    if (!isProgrammaticConfig(conf, MRJobConfig.OUTPUT_KEY_CLASS)) {
        Class<?> cls = TypeToken.of(typeArgs[2]).getRawType();
        LOG.debug("Set output key class to {}", cls);
        job.setOutputKeyClass(cls);
    }
    if (!isProgrammaticConfig(conf, MRJobConfig.OUTPUT_VALUE_CLASS)) {
        Class<?> cls = TypeToken.of(typeArgs[3]).getRawType();
        LOG.debug("Set output value class to {}", cls);
        job.setOutputValueClass(cls);
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Sets the map output key and value classes in the job configuration by inspecting the {@link Mapper}
 * if it is not set by the user.//from   www  .j a v  a  2  s  .  c o  m
 *
 * @param job the MapReduce job
 * @param mapperTypeToken TypeToken of a configured mapper (may not be configured on the job). Has already been
 *                        resolved from the job's mapper class.
 */
private void setMapOutputClassesIfNeeded(Job job, @Nullable TypeToken<?> mapperTypeToken) {
    Configuration conf = job.getConfiguration();

    TypeToken<?> type = mapperTypeToken;
    int keyIdx = 2;
    int valueIdx = 3;

    if (type == null) {
        // Reducer only job. Use the Reducer input types as the key/value classes.
        type = resolveClass(conf, MRJobConfig.REDUCE_CLASS_ATTR, Reducer.class);
        keyIdx = 0;
        valueIdx = 1;
    }

    // If not able to detect type, nothing to set.
    if (type == null || !(type.getType() instanceof ParameterizedType)) {
        return;
    }

    Type[] typeArgs = ((ParameterizedType) type.getType()).getActualTypeArguments();

    // Set it only if the user didn't set it in beforeSubmit
    // The key and value type are in the 3rd and 4th type parameters
    if (!isProgrammaticConfig(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS)) {
        Class<?> cls = TypeToken.of(typeArgs[keyIdx]).getRawType();
        LOG.debug("Set map output key class to {}", cls);
        job.setMapOutputKeyClass(cls);
    }
    if (!isProgrammaticConfig(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS)) {
        Class<?> cls = TypeToken.of(typeArgs[valueIdx]).getRawType();
        LOG.debug("Set map output value class to {}", cls);
        job.setMapOutputValueClass(cls);
    }
}