List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java
License:Apache License
private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName) throws IOException { Job job = new Job(context.getConfiguration()); FileOutputFormat.setOutputName(job, newOutputName); // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat. if (isAvroOutputFormat(getFileOutputFormat(context))) { job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName); }//from w w w. j av a2 s. com Path jobOutputPath = createJobSpecificPath(FileOutputFormat.getOutputPath(job), context); FileOutputFormat.setOutputPath(job, jobOutputPath); return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID()); }
From source file:co.cask.cdap.internal.app.runtime.batch.MapperWrapper.java
License:Apache License
/** * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined. * @param job The MapReduce job//from ww w . ja va2 s.c om */ public static void wrap(Job job) { // NOTE: we don't use job.getMapperClass() as we don't need to load user class here Configuration conf = job.getConfiguration(); String mapClass = conf.get(MRJobConfig.MAP_CLASS_ATTR, Mapper.class.getName()); conf.set(MapperWrapper.ATTR_MAPPER_CLASS, mapClass); job.setMapperClass(MapperWrapper.class); }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
@Override protected void startUp() throws Exception { // Creates a temporary directory locally for storing all generated files. File tempDir = createTempDirectory(); cleanupTask = createCleanupTask(tempDir); try {/*from w w w.jav a 2 s . com*/ Job job = createJob(new File(tempDir, "mapreduce")); Configuration mapredConf = job.getConfiguration(); classLoader = new MapReduceClassLoader(injector, cConf, mapredConf, context.getProgram().getClassLoader(), context.getPlugins(), context.getPluginInstantiator()); cleanupTask = createCleanupTask(cleanupTask, classLoader); mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader)); ClassLoaders.setContextClassLoader(mapredConf.getClassLoader()); context.setJob(job); beforeSubmit(job); // Localize additional resources that users have requested via BasicMapReduceContext.localize methods Map<String, String> localizedUserResources = localizeUserResources(job, tempDir); // Override user-defined job name, since we set it and depend on the name. // https://issues.cask.co/browse/CDAP-2441 String jobName = job.getJobName(); if (!jobName.isEmpty()) { LOG.warn("Job name {} is being overridden.", jobName); } job.setJobName(getJobName(context)); // Create a temporary location for storing all generated files through the LocationFactory. Location tempLocation = createTempLocationDirectory(); cleanupTask = createCleanupTask(cleanupTask, tempLocation); // For local mode, everything is in the configuration classloader already, hence no need to create new jar if (!MapReduceTaskContextProvider.isLocal(mapredConf)) { // After calling beforeSubmit, we know what plugins are needed for the program, hence construct the proper // ClassLoader from here and use it for setting up the job Location pluginArchive = createPluginArchive(tempLocation); if (pluginArchive != null) { job.addCacheArchive(pluginArchive.toURI()); mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName()); } } // set resources for the job TaskType.MAP.setResources(mapredConf, context.getMapperResources()); TaskType.REDUCE.setResources(mapredConf, context.getReducerResources()); // replace user's Mapper & Reducer's with our wrappers in job config MapperWrapper.wrap(job); ReducerWrapper.wrap(job); // packaging job jar which includes cdap classes with dependencies File jobJar = buildJobJar(job, tempDir); job.setJar(jobJar.toURI().toString()); Location programJar = programJarLocation; if (!MapReduceTaskContextProvider.isLocal(mapredConf)) { // Copy and localize the program jar in distributed mode programJar = copyProgramJar(tempLocation); job.addCacheFile(programJar.toURI()); List<String> classpath = new ArrayList<>(); // Localize logback.xml Location logbackLocation = createLogbackJar(tempLocation); if (logbackLocation != null) { job.addCacheFile(logbackLocation.toURI()); classpath.add(logbackLocation.getName()); } // Generate and localize the launcher jar to control the classloader of MapReduce containers processes classpath.add("job.jar/lib/*"); classpath.add("job.jar/classes"); Location launcherJar = createLauncherJar( Joiner.on(",").join(MapReduceContainerHelper.getMapReduceClassPath(mapredConf, classpath)), tempLocation); job.addCacheFile(launcherJar.toURI()); // The only thing in the container classpath is the launcher.jar // The MapReduceContainerLauncher inside the launcher.jar will creates a MapReduceClassLoader and launch // the actual MapReduce AM/Task from that // We explicitly localize the mr-framwork, but not use it with the classpath URI frameworkURI = MapReduceContainerHelper.getFrameworkURI(mapredConf); if (frameworkURI != null) { job.addCacheArchive(frameworkURI); } mapredConf.unset(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH); mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, launcherJar.getName()); mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, launcherJar.getName()); } MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf); // We start long-running tx to be used by mapreduce job tasks. Transaction tx = txClient.startLong(); try { // We remember tx, so that we can re-use it in mapreduce tasks CConfiguration cConfCopy = cConf; contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources); LOG.info("Submitting MapReduce Job: {}", context); // submits job and returns immediately. Shouldn't need to set context ClassLoader. job.submit(); this.job = job; this.transaction = tx; } catch (Throwable t) { Transactions.invalidateQuietly(txClient, tx); throw t; } } catch (Throwable t) { LOG.error("Exception when submitting MapReduce Job: {}", context, t); cleanupTask.run(); throw t; } }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Creates a MapReduce {@link Job} instance. * * @param hadoopTmpDir directory for the "hadoop.tmp.dir" configuration *///w w w. j a v a2 s .c om private Job createJob(File hadoopTmpDir) throws IOException { Job job = Job.getInstance(new Configuration(hConf)); Configuration jobConf = job.getConfiguration(); if (MapReduceTaskContextProvider.isLocal(jobConf)) { // Set the MR framework local directories inside the given tmp directory. // Setting "hadoop.tmp.dir" here has no effect due to Explore Service need to set "hadoop.tmp.dir" // as system property for Hive to work in local mode. The variable substitution of hadoop conf // gives system property the highest precedence. jobConf.set("mapreduce.cluster.local.dir", new File(hadoopTmpDir, "local").getAbsolutePath()); jobConf.set("mapreduce.jobtracker.system.dir", new File(hadoopTmpDir, "system").getAbsolutePath()); jobConf.set("mapreduce.jobtracker.staging.root.dir", new File(hadoopTmpDir, "staging").getAbsolutePath()); jobConf.set("mapreduce.cluster.temp.dir", new File(hadoopTmpDir, "temp").getAbsolutePath()); } if (UserGroupInformation.isSecurityEnabled()) { // If runs in secure cluster, this program runner is running in a yarn container, hence not able // to get authenticated with the history. jobConf.unset("mapreduce.jobhistory.address"); jobConf.setBoolean(Job.JOB_AM_ACCESS_DISABLED, false); Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); LOG.info("Running in secure mode; adding all user credentials: {}", credentials.getAllTokens()); job.getCredentials().addAll(credentials); } return job; }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Sets the configurations used for inputs. * Multiple mappers could be defined, so we first check that their output types are consistent. * * @return the TypeToken for one of the mappers (doesn't matter which one, since we check that all of their output * key/value types are consistent. Returns null if the mapper class was not configured directly on the job and the * job's mapper class is to be used./*w ww .ja va 2 s . c om*/ * @throws IllegalArgumentException if any of the configured mapper output types are inconsistent. */ @Nullable private TypeToken<Mapper> setInputsIfNeeded(Job job) throws IOException, ClassNotFoundException { Class<? extends Mapper> jobMapperClass = job.getMapperClass(); Class<? extends Mapper> firstMapperClass = null; Map.Entry<Class, Class> firstMapperOutputTypes = null; for (Map.Entry<String, MapperInput> mapperInputEntry : context.getMapperInputs().entrySet()) { MapperInput mapperInput = mapperInputEntry.getValue(); InputFormatProvider provider = mapperInput.getInputFormatProvider(); Map<String, String> inputFormatConfiguration = new HashMap<>(provider.getInputFormatConfiguration()); // default to what is configured on the job, if user didn't specify a mapper for an input Class<? extends Mapper> mapperClass = mapperInput.getMapper() == null ? jobMapperClass : mapperInput.getMapper(); // check output key/value type consistency, except for the first input if (firstMapperClass == null) { firstMapperClass = mapperClass; firstMapperOutputTypes = getMapperOutputKeyValueTypes(mapperClass); } else { assertConsistentTypes(firstMapperClass, firstMapperOutputTypes, mapperClass); } // A bit hacky for stream. if (provider instanceof StreamInputFormatProvider) { // pass in mapperInput.getMapper() instead of mapperClass, because mapperClass defaults to the Identity Mapper setDecoderForStream((StreamInputFormatProvider) provider, job, inputFormatConfiguration, mapperInput.getMapper()); } MultipleInputs.addInput(job, mapperInputEntry.getKey(), provider.getInputFormatClassName(), inputFormatConfiguration, mapperClass); } // if firstMapperClass is null, then, user is not going through our APIs to add input; leave the job's input format // to user and simply return the mapper output types of the mapper configured on the job. // if firstMapperClass == jobMapperClass, return null if the user didn't configure the mapper class explicitly if (firstMapperClass == null || firstMapperClass == jobMapperClass) { return resolveClass(job.getConfiguration(), MRJobConfig.MAP_CLASS_ATTR, Mapper.class); } return resolveClass(firstMapperClass, Mapper.class); }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
private void setDecoderForStream(StreamInputFormatProvider streamProvider, Job job, Map<String, String> inputFormatConfiguration, Class<? extends Mapper> mapperClass) { // For stream, we need to do two extra steps. // 1. stream usage registration since it only happens on client side. // 2. Infer the stream event decoder from Mapper/Reducer TypeToken<?> mapperTypeToken = mapperClass == null ? null : resolveClass(mapperClass, Mapper.class); Type inputValueType = getInputValueType(job.getConfiguration(), StreamEvent.class, mapperTypeToken); streamProvider.setDecoderType(inputFormatConfiguration, inputValueType); Id.Stream streamId = streamProvider.getStreamId(); try {// ww w .ja v a 2 s . co m usageRegistry.register(context.getProgram().getId(), streamId); streamAdmin.addAccess(new Id.Run(context.getProgram().getId(), context.getRunId().getId()), streamId, AccessType.READ); } catch (Exception e) { LOG.warn("Failed to register usage {} -> {}", context.getProgram().getId(), streamId, e); } }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Sets the configurations used for outputs. *///w w w . j a va 2s.c o m private void setOutputsIfNeeded(Job job) { Map<String, OutputFormatProvider> outputFormatProviders = context.getOutputFormatProviders(); LOG.debug("Using as output for MapReduce Job: {}", outputFormatProviders.keySet()); if (outputFormatProviders.isEmpty()) { // user is not going through our APIs to add output; leave the job's output format to user return; } else if (outputFormatProviders.size() == 1) { // If only one output is configured through the context, then set it as the root OutputFormat Map.Entry<String, OutputFormatProvider> next = outputFormatProviders.entrySet().iterator().next(); OutputFormatProvider outputFormatProvider = next.getValue(); ConfigurationUtil.setAll(outputFormatProvider.getOutputFormatConfiguration(), job.getConfiguration()); job.getConfiguration().set(Job.OUTPUT_FORMAT_CLASS_ATTR, outputFormatProvider.getOutputFormatClassName()); return; } // multiple output formats configured via the context. We should use a RecordWriter that doesn't support writing // as the root output format in this case to disallow writing directly on the context MultipleOutputsMainOutputWrapper.setRootOutputFormat(job, UnsupportedOutputFormat.class.getName(), new HashMap<String, String>()); job.setOutputFormatClass(MultipleOutputsMainOutputWrapper.class); for (Map.Entry<String, OutputFormatProvider> entry : outputFormatProviders.entrySet()) { String outputName = entry.getKey(); OutputFormatProvider outputFormatProvider = entry.getValue(); String outputFormatClassName = outputFormatProvider.getOutputFormatClassName(); if (outputFormatClassName == null) { throw new IllegalArgumentException( "Output '" + outputName + "' provided null as the output format"); } Map<String, String> outputConfig = outputFormatProvider.getOutputFormatConfiguration(); MultipleOutputs.addNamedOutput(job, outputName, outputFormatClassName, job.getOutputKeyClass(), job.getOutputValueClass(), outputConfig); } }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop. * * @return a new {@link File} containing the job jar *//* w w w .j av a 2s .c om*/ private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException { File jobJar = new File(tempDir, "job.jar"); LOG.debug("Creating Job jar: {}", jobJar); // For local mode, nothing is needed in the job jar since we use the classloader in the configuration object. if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) { JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar)); output.close(); return jobJar; } // Excludes libraries that are for sure not needed. // Hadoop - Available from the cluster // Spark - MR never uses Spark final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder(); ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() { @Override public boolean accept(String className, URL classUrl, URL classPathUrl) { if (className.startsWith("org.apache.spark") || classPathUrl.toString().contains("spark-assembly")) { return false; } return hadoopClassExcluder.accept(className, classUrl, classPathUrl); } }); Set<Class<?>> classes = Sets.newHashSet(); classes.add(MapReduce.class); classes.add(MapperWrapper.class); classes.add(ReducerWrapper.class); // We only need to trace the Input/OutputFormat class due to MAPREDUCE-5957 so that those classes are included // in the job.jar and be available in the MR system classpath before our job classloader (ApplicationClassLoader) // take over the classloading. if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) { try { Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass(); LOG.info("InputFormat class: {} {}", inputFormatClass, inputFormatClass.getClassLoader()); classes.add(inputFormatClass); // If it is StreamInputFormat, also add the StreamEventCodec class as well. if (StreamInputFormat.class.isAssignableFrom(inputFormatClass)) { Class<? extends StreamEventDecoder> decoderType = StreamInputFormat .getDecoderClass(job.getConfiguration()); if (decoderType != null) { classes.add(decoderType); } } } catch (Throwable t) { LOG.info("InputFormat class not found: {}", t.getMessage(), t); // Ignore } try { Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass(); LOG.info("OutputFormat class: {} {}", outputFormatClass, outputFormatClass.getClassLoader()); classes.add(outputFormatClass); } catch (Throwable t) { LOG.info("OutputFormat class not found: {}", t.getMessage(), t); // Ignore } } // End of MAPREDUCE-5957. try { Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass(); classes.add(hbaseTableUtilClass); } catch (ProvisionException e) { LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available"); } ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(job.getConfiguration().getClassLoader()); appBundler.createBundle(Locations.toLocation(jobJar), classes); ClassLoaders.setContextClassLoader(oldCLassLoader); LOG.info("Built MapReduce Job Jar at {}", jobJar.toURI()); return jobJar; }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Sets the output key and value classes in the job configuration by inspecting the {@link Mapper} and {@link Reducer} * if it is not set by the user.//from w ww . j a v a 2s.c om * * @param job the MapReduce job * @param mapperTypeToken TypeToken of a configured mapper (may not be configured on the job). Has already been * resolved from the job's mapper class. */ private void setOutputClassesIfNeeded(Job job, @Nullable TypeToken<?> mapperTypeToken) { Configuration conf = job.getConfiguration(); // Try to get the type from reducer TypeToken<?> type = resolveClass(conf, MRJobConfig.REDUCE_CLASS_ATTR, Reducer.class); if (type == null) { // Map only job type = mapperTypeToken; } // If not able to detect type, nothing to set if (type == null || !(type.getType() instanceof ParameterizedType)) { return; } Type[] typeArgs = ((ParameterizedType) type.getType()).getActualTypeArguments(); // Set it only if the user didn't set it in beforeSubmit // The key and value type are in the 3rd and 4th type parameters if (!isProgrammaticConfig(conf, MRJobConfig.OUTPUT_KEY_CLASS)) { Class<?> cls = TypeToken.of(typeArgs[2]).getRawType(); LOG.debug("Set output key class to {}", cls); job.setOutputKeyClass(cls); } if (!isProgrammaticConfig(conf, MRJobConfig.OUTPUT_VALUE_CLASS)) { Class<?> cls = TypeToken.of(typeArgs[3]).getRawType(); LOG.debug("Set output value class to {}", cls); job.setOutputValueClass(cls); } }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Sets the map output key and value classes in the job configuration by inspecting the {@link Mapper} * if it is not set by the user.//from www .j a v a 2 s . c o m * * @param job the MapReduce job * @param mapperTypeToken TypeToken of a configured mapper (may not be configured on the job). Has already been * resolved from the job's mapper class. */ private void setMapOutputClassesIfNeeded(Job job, @Nullable TypeToken<?> mapperTypeToken) { Configuration conf = job.getConfiguration(); TypeToken<?> type = mapperTypeToken; int keyIdx = 2; int valueIdx = 3; if (type == null) { // Reducer only job. Use the Reducer input types as the key/value classes. type = resolveClass(conf, MRJobConfig.REDUCE_CLASS_ATTR, Reducer.class); keyIdx = 0; valueIdx = 1; } // If not able to detect type, nothing to set. if (type == null || !(type.getType() instanceof ParameterizedType)) { return; } Type[] typeArgs = ((ParameterizedType) type.getType()).getActualTypeArguments(); // Set it only if the user didn't set it in beforeSubmit // The key and value type are in the 3rd and 4th type parameters if (!isProgrammaticConfig(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS)) { Class<?> cls = TypeToken.of(typeArgs[keyIdx]).getRawType(); LOG.debug("Set map output key class to {}", cls); job.setMapOutputKeyClass(cls); } if (!isProgrammaticConfig(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS)) { Class<?> cls = TypeToken.of(typeArgs[valueIdx]).getRawType(); LOG.debug("Set map output value class to {}", cls); job.setMapOutputValueClass(cls); } }