Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Localizes resources requested by users in the MapReduce Program's beforeSubmit phase.
 * In Local mode, also copies resources to a temporary directory.
 *
 * @param job the {@link Job} for this MapReduce program
 * @param targetDir in local mode, a temporary directory to copy the resources to
 * @return a {@link Map} of resource name to the resource path. The resource path will be absolute in local mode,
 * while it will just contain the file name in distributed mode.
 *//*ww w  . j a v  a2s.  c o  m*/
private Map<String, String> localizeUserResources(Job job, File targetDir) throws IOException {
    Map<String, String> localizedResources = new HashMap<>();
    Map<String, LocalizeResource> resourcesToLocalize = context.getResourcesToLocalize();
    for (Map.Entry<String, LocalizeResource> entry : resourcesToLocalize.entrySet()) {
        String localizedFilePath;
        String name = entry.getKey();
        Configuration mapredConf = job.getConfiguration();
        if (MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // in local mode, also add localize resources in a temporary directory
            localizedFilePath = LocalizationUtils.localizeResource(entry.getKey(), entry.getValue(), targetDir)
                    .getAbsolutePath();
        } else {
            URI uri = entry.getValue().getURI();
            // in distributed mode, use the MapReduce Job object to localize resources
            URI actualURI;
            try {
                actualURI = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), uri.getQuery(), name);
            } catch (URISyntaxException e) {
                // Most of the URI is constructed from the passed URI. So ideally, this should not happen.
                // If it does though, there is nothing that clients can do to recover, so not propagating a checked exception.
                throw Throwables.propagate(e);
            }
            if (entry.getValue().isArchive()) {
                job.addCacheArchive(actualURI);
            } else {
                job.addCacheFile(actualURI);
            }
            localizedFilePath = name;
        }
        localizedResources.put(name, localizedFilePath);
    }
    return localizedResources;
}

From source file:co.cask.cdap.internal.app.runtime.batch.ReducerWrapper.java

License:Apache License

/**
 * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined.
 * @param job The MapReduce job//from   ww w  .jav a 2 s  .  c  o m
 */
public static void wrap(Job job) {
    // NOTE: we don't use job.getReducerClass() as we don't need to load user class here
    Configuration conf = job.getConfiguration();
    String reducerClass = conf.get(MRJobConfig.REDUCE_CLASS_ATTR);
    if (reducerClass != null) {
        conf.set(ReducerWrapper.ATTR_REDUCER_CLASS, reducerClass);
        job.setReducerClass(ReducerWrapper.class);
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.WrapperUtil.java

License:Apache License

static boolean setIfDefined(Job job, String srcKey, String destinationKey) {
    // NOTE: we don't use job.getXClass or conf.getClass as we don't need to load user class here
    Configuration conf = job.getConfiguration();
    String srcVal = conf.get(srcKey);
    if (srcVal != null) {
        conf.set(destinationKey, srcVal);
        return true;
    }/*from ww  w.j a va 2s  . co  m*/
    return false;
}

From source file:co.cask.cdap.longrunning.datacleansing.DataCleansingMapReduce.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    partitionCommitter = PartitionBatchInput.setInput(context, DataCleansingApp.RAW_RECORDS,
            new KVTableStatePersistor(DataCleansingApp.CONSUMING_STATE, "state.key"));

    // Each run writes its output to a partition for the league
    Long timeKey = Long.valueOf(context.getRuntimeArguments().get(OUTPUT_PARTITION_KEY));
    PartitionKey outputKey = PartitionKey.builder().addLongField("time", timeKey).build();

    Map<String, String> metadataToAssign = ImmutableMap.of("source.program", "DataCleansingMapReduce");

    // set up two outputs - one for invalid records and one for valid records
    Map<String, String> invalidRecordsArgs = new HashMap<>();
    PartitionedFileSetArguments.setOutputPartitionKey(invalidRecordsArgs, outputKey);
    PartitionedFileSetArguments.setOutputPartitionMetadata(invalidRecordsArgs, metadataToAssign);
    context.addOutput(DataCleansingApp.INVALID_RECORDS, invalidRecordsArgs);

    Map<String, String> cleanRecordsArgs = new HashMap<>();
    PartitionedFileSetArguments.setDynamicPartitioner(cleanRecordsArgs, TimeAndZipPartitioner.class);
    PartitionedFileSetArguments.setOutputPartitionMetadata(cleanRecordsArgs, metadataToAssign);
    context.addOutput(DataCleansingApp.CLEAN_RECORDS, cleanRecordsArgs);

    Job job = context.getHadoopJob();
    job.setMapperClass(SchemaMatchingFilter.class);
    job.setNumReduceTasks(0);//from  www  .j a v a 2s.c  o  m

    // simply propagate the schema (if any) to be used by the mapper
    String schemaJson = context.getRuntimeArguments().get(SCHEMA_KEY);
    if (schemaJson != null) {
        job.getConfiguration().set(SCHEMA_KEY, schemaJson);
    }
}

From source file:co.cask.cdap.template.etl.batch.sink.DBSink.java

License:Apache License

@Override
public void prepareRun(BatchSinkContext context) {
    LOG.debug("tableName = {}; pluginType = {}; pluginName = {}; connectionString = {}; columns = {}",
            dbSinkConfig.tableName, dbSinkConfig.jdbcPluginType, dbSinkConfig.jdbcPluginName,
            dbSinkConfig.connectionString, dbSinkConfig.columns);

    Job job = context.getHadoopJob();
    Configuration hConf = job.getConfiguration();

    // Load the plugin class to make sure it is available.
    Class<? extends Driver> driverClass = context.loadPluginClass(getJDBCPluginId());
    if (dbSinkConfig.user == null && dbSinkConfig.password == null) {
        DBConfiguration.configureDB(hConf, driverClass.getName(), dbSinkConfig.connectionString);
    } else {/*from  w w w  .  ja va2  s  .  c o m*/
        DBConfiguration.configureDB(hConf, driverClass.getName(), dbSinkConfig.connectionString,
                dbSinkConfig.user, dbSinkConfig.password);
    }
    List<String> fields = Lists.newArrayList(Splitter.on(",").omitEmptyStrings().split(dbSinkConfig.columns));
    try {
        ETLDBOutputFormat.setOutput(job, dbSinkConfig.tableName, fields.toArray(new String[fields.size()]));
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
    job.setOutputFormatClass(ETLDBOutputFormat.class);
}

From source file:co.cask.cdap.template.etl.batch.source.DBSource.java

License:Apache License

@Override
public void prepareRun(BatchSourceContext context) {
    LOG.debug("pluginType = {}; pluginName = {}; connectionString = {}; importQuery = {}; " + "countQuery = {}",
            dbSourceConfig.jdbcPluginType, dbSourceConfig.jdbcPluginName, dbSourceConfig.connectionString,
            dbSourceConfig.importQuery, dbSourceConfig.countQuery);

    Job job = context.getHadoopJob();
    Configuration hConf = job.getConfiguration();
    // Load the plugin class to make sure it is available.
    Class<? extends Driver> driverClass = context.loadPluginClass(getJDBCPluginId());
    if (dbSourceConfig.user == null && dbSourceConfig.password == null) {
        DBConfiguration.configureDB(hConf, driverClass.getName(), dbSourceConfig.connectionString);
    } else {//from w  ww . java  2 s.  c o  m
        DBConfiguration.configureDB(hConf, driverClass.getName(), dbSourceConfig.connectionString,
                dbSourceConfig.user, dbSourceConfig.password);
    }
    ETLDBInputFormat.setInput(job, DBRecord.class, dbSourceConfig.importQuery, dbSourceConfig.countQuery);
    job.setInputFormatClass(ETLDBInputFormat.class);
}

From source file:co.cask.cdap.template.etl.batch.source.FileBatchSource.java

License:Apache License

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
    //SimpleDateFormat needs to be local because it is not threadsafe
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH");

    //calculate date one hour ago, rounded down to the nearest hour
    prevHour = new Date(context.getLogicalStartTime() - TimeUnit.HOURS.toMillis(1));
    Calendar cal = Calendar.getInstance();
    cal.setTime(prevHour);//from w w  w  .  j a  v  a 2  s .c  o  m
    cal.set(Calendar.MINUTE, 0);
    cal.set(Calendar.SECOND, 0);
    cal.set(Calendar.MILLISECOND, 0);
    prevHour = cal.getTime();

    Job job = context.getHadoopJob();
    Configuration conf = job.getConfiguration();
    if (config.fileSystemProperties != null) {
        Map<String, String> properties = GSON.fromJson(config.fileSystemProperties, MAP_STRING_STRING_TYPE);
        for (Map.Entry<String, String> entry : properties.entrySet()) {
            conf.set(entry.getKey(), entry.getValue());
        }
    }

    if (config.fileRegex != null) {
        conf.set(INPUT_REGEX_CONFIG, config.fileRegex);
    }
    conf.set(INPUT_NAME_CONFIG, config.path);

    if (config.timeTable != null) {
        table = context.getDataset(config.timeTable);
        datesToRead = Bytes.toString(table.read(LAST_TIME_READ));
        if (datesToRead == null) {
            List<Date> firstRun = Lists.newArrayList(new Date(0));
            datesToRead = GSON.toJson(firstRun, ARRAYLIST_DATE_TYPE);
        }
        List<Date> attempted = Lists.newArrayList(prevHour);
        String updatedDatesToRead = GSON.toJson(attempted, ARRAYLIST_DATE_TYPE);
        if (!updatedDatesToRead.equals(datesToRead)) {
            table.write(LAST_TIME_READ, updatedDatesToRead);
        }
        conf.set(LAST_TIME_READ, datesToRead);
    }

    conf.set(CUTOFF_READ_TIME, dateFormat.format(prevHour));
    if (!Strings.isNullOrEmpty(config.inputFormatClass)) {
        ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
        Class<? extends FileInputFormat> classType = (Class<? extends FileInputFormat>) classLoader
                .loadClass(config.inputFormatClass);
        job.setInputFormatClass(classType);
    } else {
        job.setInputFormatClass(CombineTextInputFormat.class);
    }
    FileInputFormat.setInputPathFilter(job, BatchFileFilter.class);
    FileInputFormat.addInputPath(job, new Path(config.path));
    long maxSplitSize;
    try {
        maxSplitSize = Long.parseLong(config.maxSplitSize);
    } catch (NumberFormatException e) {
        maxSplitSize = DEFAULT_SPLIT_SIZE;
    }
    CombineTextInputFormat.setMaxInputSplitSize(job, maxSplitSize);
}

From source file:co.cask.hydrator.common.batch.JobUtils.java

License:Apache License

/**
 * Creates a new instance of {@link Job}. Note that the job created is not meant for actual MR
 * submission. It's just for setting up configurations.
 *///from   ww w  . j  av  a  2 s.c om
public static Job createInstance() throws IOException {
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    conf.clear();

    if (UserGroupInformation.isSecurityEnabled()) {
        // If runs in secure cluster, this program runner is running in a yarn container, hence not able
        // to get authenticated with the history.
        conf.unset("mapreduce.jobhistory.address");
        conf.setBoolean(Job.JOB_AM_ACCESS_DISABLED, false);

        Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
        job.getCredentials().addAll(credentials);
    }

    return job;
}

From source file:co.cask.hydrator.plugin.batch.action.FileAction.java

License:Apache License

@SuppressWarnings("ConstantConditions")
@Override/*from w  ww  .  j av  a  2s.  com*/
public void run(BatchActionContext context) throws Exception {
    if (!config.shouldRun(context)) {
        return;
    }
    config.substituteMacros(context);

    Job job = JobUtils.createInstance();
    Configuration conf = job.getConfiguration();
    FileSystem fileSystem = FileSystem.get(conf);
    Path[] paths;
    Path sourcePath = new Path(config.path);
    if (fileSystem.isDirectory(sourcePath)) {
        FileStatus[] status = fileSystem.listStatus(sourcePath);
        paths = FileUtil.stat2Paths(status);
    } else {
        paths = new Path[] { sourcePath };
    }

    //get regex pattern for file name filtering.
    boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern);
    if (patternSpecified) {
        regex = Pattern.compile(config.pattern);
    }

    switch (config.action.toLowerCase()) {
    case "delete":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                fileSystem.delete(path, true);
            }
        }
        break;
    case "move":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                Path targetFileMovePath = new Path(config.targetFolder, path.getName());
                fileSystem.rename(path, targetFileMovePath);
            }
        }
        break;
    case "archive":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                try (FSDataOutputStream archivedStream = fileSystem
                        .create(new Path(config.targetFolder, path.getName() + ".zip"));
                        ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream);
                        FSDataInputStream fdDataInputStream = fileSystem.open(path)) {
                    zipArchivedStream.putNextEntry(new ZipEntry(path.getName()));
                    int length;
                    byte[] buffer = new byte[1024];
                    while ((length = fdDataInputStream.read(buffer)) > 0) {
                        zipArchivedStream.write(buffer, 0, length);
                    }
                    zipArchivedStream.closeEntry();
                }
                fileSystem.delete(path, true);
            }
        }
        break;
    default:
        LOG.warn("No action required on the file.");
        break;
    }
}

From source file:co.cask.hydrator.plugin.batch.CopybookInputFormat.java

License:Apache License

public static void setCopybookInputformatCblContents(Job job, String copybookCOntents) {
    job.getConfiguration().set(COPYBOOK_INPUTFORMAT_CBL_CONTENTS, copybookCOntents);
}