List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Localizes resources requested by users in the MapReduce Program's beforeSubmit phase. * In Local mode, also copies resources to a temporary directory. * * @param job the {@link Job} for this MapReduce program * @param targetDir in local mode, a temporary directory to copy the resources to * @return a {@link Map} of resource name to the resource path. The resource path will be absolute in local mode, * while it will just contain the file name in distributed mode. *//*ww w . j a v a2s. c o m*/ private Map<String, String> localizeUserResources(Job job, File targetDir) throws IOException { Map<String, String> localizedResources = new HashMap<>(); Map<String, LocalizeResource> resourcesToLocalize = context.getResourcesToLocalize(); for (Map.Entry<String, LocalizeResource> entry : resourcesToLocalize.entrySet()) { String localizedFilePath; String name = entry.getKey(); Configuration mapredConf = job.getConfiguration(); if (MapReduceTaskContextProvider.isLocal(mapredConf)) { // in local mode, also add localize resources in a temporary directory localizedFilePath = LocalizationUtils.localizeResource(entry.getKey(), entry.getValue(), targetDir) .getAbsolutePath(); } else { URI uri = entry.getValue().getURI(); // in distributed mode, use the MapReduce Job object to localize resources URI actualURI; try { actualURI = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), uri.getQuery(), name); } catch (URISyntaxException e) { // Most of the URI is constructed from the passed URI. So ideally, this should not happen. // If it does though, there is nothing that clients can do to recover, so not propagating a checked exception. throw Throwables.propagate(e); } if (entry.getValue().isArchive()) { job.addCacheArchive(actualURI); } else { job.addCacheFile(actualURI); } localizedFilePath = name; } localizedResources.put(name, localizedFilePath); } return localizedResources; }
From source file:co.cask.cdap.internal.app.runtime.batch.ReducerWrapper.java
License:Apache License
/** * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined. * @param job The MapReduce job//from ww w .jav a 2 s . c o m */ public static void wrap(Job job) { // NOTE: we don't use job.getReducerClass() as we don't need to load user class here Configuration conf = job.getConfiguration(); String reducerClass = conf.get(MRJobConfig.REDUCE_CLASS_ATTR); if (reducerClass != null) { conf.set(ReducerWrapper.ATTR_REDUCER_CLASS, reducerClass); job.setReducerClass(ReducerWrapper.class); } }
From source file:co.cask.cdap.internal.app.runtime.batch.WrapperUtil.java
License:Apache License
static boolean setIfDefined(Job job, String srcKey, String destinationKey) { // NOTE: we don't use job.getXClass or conf.getClass as we don't need to load user class here Configuration conf = job.getConfiguration(); String srcVal = conf.get(srcKey); if (srcVal != null) { conf.set(destinationKey, srcVal); return true; }/*from ww w.j a va 2s . co m*/ return false; }
From source file:co.cask.cdap.longrunning.datacleansing.DataCleansingMapReduce.java
License:Apache License
@Override public void beforeSubmit(MapReduceContext context) throws Exception { partitionCommitter = PartitionBatchInput.setInput(context, DataCleansingApp.RAW_RECORDS, new KVTableStatePersistor(DataCleansingApp.CONSUMING_STATE, "state.key")); // Each run writes its output to a partition for the league Long timeKey = Long.valueOf(context.getRuntimeArguments().get(OUTPUT_PARTITION_KEY)); PartitionKey outputKey = PartitionKey.builder().addLongField("time", timeKey).build(); Map<String, String> metadataToAssign = ImmutableMap.of("source.program", "DataCleansingMapReduce"); // set up two outputs - one for invalid records and one for valid records Map<String, String> invalidRecordsArgs = new HashMap<>(); PartitionedFileSetArguments.setOutputPartitionKey(invalidRecordsArgs, outputKey); PartitionedFileSetArguments.setOutputPartitionMetadata(invalidRecordsArgs, metadataToAssign); context.addOutput(DataCleansingApp.INVALID_RECORDS, invalidRecordsArgs); Map<String, String> cleanRecordsArgs = new HashMap<>(); PartitionedFileSetArguments.setDynamicPartitioner(cleanRecordsArgs, TimeAndZipPartitioner.class); PartitionedFileSetArguments.setOutputPartitionMetadata(cleanRecordsArgs, metadataToAssign); context.addOutput(DataCleansingApp.CLEAN_RECORDS, cleanRecordsArgs); Job job = context.getHadoopJob(); job.setMapperClass(SchemaMatchingFilter.class); job.setNumReduceTasks(0);//from www .j a v a 2s.c o m // simply propagate the schema (if any) to be used by the mapper String schemaJson = context.getRuntimeArguments().get(SCHEMA_KEY); if (schemaJson != null) { job.getConfiguration().set(SCHEMA_KEY, schemaJson); } }
From source file:co.cask.cdap.template.etl.batch.sink.DBSink.java
License:Apache License
@Override public void prepareRun(BatchSinkContext context) { LOG.debug("tableName = {}; pluginType = {}; pluginName = {}; connectionString = {}; columns = {}", dbSinkConfig.tableName, dbSinkConfig.jdbcPluginType, dbSinkConfig.jdbcPluginName, dbSinkConfig.connectionString, dbSinkConfig.columns); Job job = context.getHadoopJob(); Configuration hConf = job.getConfiguration(); // Load the plugin class to make sure it is available. Class<? extends Driver> driverClass = context.loadPluginClass(getJDBCPluginId()); if (dbSinkConfig.user == null && dbSinkConfig.password == null) { DBConfiguration.configureDB(hConf, driverClass.getName(), dbSinkConfig.connectionString); } else {/*from w w w . ja va2 s . c o m*/ DBConfiguration.configureDB(hConf, driverClass.getName(), dbSinkConfig.connectionString, dbSinkConfig.user, dbSinkConfig.password); } List<String> fields = Lists.newArrayList(Splitter.on(",").omitEmptyStrings().split(dbSinkConfig.columns)); try { ETLDBOutputFormat.setOutput(job, dbSinkConfig.tableName, fields.toArray(new String[fields.size()])); } catch (IOException e) { throw Throwables.propagate(e); } job.setOutputFormatClass(ETLDBOutputFormat.class); }
From source file:co.cask.cdap.template.etl.batch.source.DBSource.java
License:Apache License
@Override public void prepareRun(BatchSourceContext context) { LOG.debug("pluginType = {}; pluginName = {}; connectionString = {}; importQuery = {}; " + "countQuery = {}", dbSourceConfig.jdbcPluginType, dbSourceConfig.jdbcPluginName, dbSourceConfig.connectionString, dbSourceConfig.importQuery, dbSourceConfig.countQuery); Job job = context.getHadoopJob(); Configuration hConf = job.getConfiguration(); // Load the plugin class to make sure it is available. Class<? extends Driver> driverClass = context.loadPluginClass(getJDBCPluginId()); if (dbSourceConfig.user == null && dbSourceConfig.password == null) { DBConfiguration.configureDB(hConf, driverClass.getName(), dbSourceConfig.connectionString); } else {//from w ww . java 2 s. c o m DBConfiguration.configureDB(hConf, driverClass.getName(), dbSourceConfig.connectionString, dbSourceConfig.user, dbSourceConfig.password); } ETLDBInputFormat.setInput(job, DBRecord.class, dbSourceConfig.importQuery, dbSourceConfig.countQuery); job.setInputFormatClass(ETLDBInputFormat.class); }
From source file:co.cask.cdap.template.etl.batch.source.FileBatchSource.java
License:Apache License
@Override public void prepareRun(BatchSourceContext context) throws Exception { //SimpleDateFormat needs to be local because it is not threadsafe SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH"); //calculate date one hour ago, rounded down to the nearest hour prevHour = new Date(context.getLogicalStartTime() - TimeUnit.HOURS.toMillis(1)); Calendar cal = Calendar.getInstance(); cal.setTime(prevHour);//from w w w . j a v a 2 s .c o m cal.set(Calendar.MINUTE, 0); cal.set(Calendar.SECOND, 0); cal.set(Calendar.MILLISECOND, 0); prevHour = cal.getTime(); Job job = context.getHadoopJob(); Configuration conf = job.getConfiguration(); if (config.fileSystemProperties != null) { Map<String, String> properties = GSON.fromJson(config.fileSystemProperties, MAP_STRING_STRING_TYPE); for (Map.Entry<String, String> entry : properties.entrySet()) { conf.set(entry.getKey(), entry.getValue()); } } if (config.fileRegex != null) { conf.set(INPUT_REGEX_CONFIG, config.fileRegex); } conf.set(INPUT_NAME_CONFIG, config.path); if (config.timeTable != null) { table = context.getDataset(config.timeTable); datesToRead = Bytes.toString(table.read(LAST_TIME_READ)); if (datesToRead == null) { List<Date> firstRun = Lists.newArrayList(new Date(0)); datesToRead = GSON.toJson(firstRun, ARRAYLIST_DATE_TYPE); } List<Date> attempted = Lists.newArrayList(prevHour); String updatedDatesToRead = GSON.toJson(attempted, ARRAYLIST_DATE_TYPE); if (!updatedDatesToRead.equals(datesToRead)) { table.write(LAST_TIME_READ, updatedDatesToRead); } conf.set(LAST_TIME_READ, datesToRead); } conf.set(CUTOFF_READ_TIME, dateFormat.format(prevHour)); if (!Strings.isNullOrEmpty(config.inputFormatClass)) { ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); Class<? extends FileInputFormat> classType = (Class<? extends FileInputFormat>) classLoader .loadClass(config.inputFormatClass); job.setInputFormatClass(classType); } else { job.setInputFormatClass(CombineTextInputFormat.class); } FileInputFormat.setInputPathFilter(job, BatchFileFilter.class); FileInputFormat.addInputPath(job, new Path(config.path)); long maxSplitSize; try { maxSplitSize = Long.parseLong(config.maxSplitSize); } catch (NumberFormatException e) { maxSplitSize = DEFAULT_SPLIT_SIZE; } CombineTextInputFormat.setMaxInputSplitSize(job, maxSplitSize); }
From source file:co.cask.hydrator.common.batch.JobUtils.java
License:Apache License
/** * Creates a new instance of {@link Job}. Note that the job created is not meant for actual MR * submission. It's just for setting up configurations. *///from ww w . j av a 2 s.c om public static Job createInstance() throws IOException { Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); conf.clear(); if (UserGroupInformation.isSecurityEnabled()) { // If runs in secure cluster, this program runner is running in a yarn container, hence not able // to get authenticated with the history. conf.unset("mapreduce.jobhistory.address"); conf.setBoolean(Job.JOB_AM_ACCESS_DISABLED, false); Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); job.getCredentials().addAll(credentials); } return job; }
From source file:co.cask.hydrator.plugin.batch.action.FileAction.java
License:Apache License
@SuppressWarnings("ConstantConditions") @Override/*from w ww . j av a 2s. com*/ public void run(BatchActionContext context) throws Exception { if (!config.shouldRun(context)) { return; } config.substituteMacros(context); Job job = JobUtils.createInstance(); Configuration conf = job.getConfiguration(); FileSystem fileSystem = FileSystem.get(conf); Path[] paths; Path sourcePath = new Path(config.path); if (fileSystem.isDirectory(sourcePath)) { FileStatus[] status = fileSystem.listStatus(sourcePath); paths = FileUtil.stat2Paths(status); } else { paths = new Path[] { sourcePath }; } //get regex pattern for file name filtering. boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern); if (patternSpecified) { regex = Pattern.compile(config.pattern); } switch (config.action.toLowerCase()) { case "delete": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { fileSystem.delete(path, true); } } break; case "move": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { Path targetFileMovePath = new Path(config.targetFolder, path.getName()); fileSystem.rename(path, targetFileMovePath); } } break; case "archive": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { try (FSDataOutputStream archivedStream = fileSystem .create(new Path(config.targetFolder, path.getName() + ".zip")); ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream); FSDataInputStream fdDataInputStream = fileSystem.open(path)) { zipArchivedStream.putNextEntry(new ZipEntry(path.getName())); int length; byte[] buffer = new byte[1024]; while ((length = fdDataInputStream.read(buffer)) > 0) { zipArchivedStream.write(buffer, 0, length); } zipArchivedStream.closeEntry(); } fileSystem.delete(path, true); } } break; default: LOG.warn("No action required on the file."); break; } }
From source file:co.cask.hydrator.plugin.batch.CopybookInputFormat.java
License:Apache License
public static void setCopybookInputformatCblContents(Job job, String copybookCOntents) { job.getConfiguration().set(COPYBOOK_INPUTFORMAT_CBL_CONTENTS, copybookCOntents); }