Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.cask.hydrator.plugin.batchSource.KafkaBatchSource.java

License:Apache License

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
    Job job = JobUtils.createInstance();
    Configuration conf = job.getConfiguration();
    table = context.getDataset(config.getTableName());
    kafkaRequests = KafkaInputFormat.saveKafkaRequests(conf, config.getTopic(), config.getBrokerMap(),
            config.getPartitions(), config.getInitialPartitionOffsets(), table);
    context.setInput(/*from   w w  w.j  a va  2 s  . c  om*/
            Input.of(config.referenceName, new SourceInputFormatProvider(KafkaInputFormat.class, conf)));
}

From source file:co.cask.hydrator.plugin.sink.HBaseSink.java

License:Apache License

@Override
public void prepareRun(BatchSinkContext context) throws Exception {
    Job job;
    ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader();
    // Switch the context classloader to plugin class' classloader (PluginClassLoader) so that
    // when Job/Configuration is created, it uses PluginClassLoader to load resources (hbase-default.xml)
    // which is present in the plugin jar and is not visible in the CombineClassLoader (which is what oldClassLoader
    // points to).
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
    try {// ww w  . ja  v  a  2  s.  com
        job = JobUtils.createInstance();
    } finally {
        // Switch back to the original
        Thread.currentThread().setContextClassLoader(oldClassLoader);
    }

    Configuration conf = job.getConfiguration();
    HBaseConfiguration.addHbaseResources(conf);

    context.addOutput(Output.of(config.referenceName, new HBaseOutputFormatProvider(config, conf))
            .alias(config.columnFamily));
}

From source file:co.nubetech.apache.hadoop.DataDrivenDBInputFormat.java

License:Apache License

/**
 * setInput() takes a custom query and a separate "bounding query" to use
 * instead of the custom "count query" used by DBInputFormat.
 *///from  www.j a va 2 s.  co  m
public static void setInput(Job job, Class<? extends DBWritable> inputClass, String inputQuery,
        String inputBoundingQuery) {
    DBInputFormat.setInput(job, inputClass, inputQuery, "");
    job.getConfiguration().set(DBConfiguration.INPUT_BOUNDING_QUERY, inputBoundingQuery);
    job.setInputFormatClass(DataDrivenDBInputFormat.class);
}

From source file:co.nubetech.apache.hadoop.DBInputFormat.java

License:Apache License

/**
 * Initializes the map-part of the job with the appropriate input settings.
 * // ww w  .j  ava 2 s . co  m
 * @param job
 *            The map-reduce job
 * @param inputClass
 *            the class object implementing DBWritable, which is the Java
 *            object holding tuple fields.
 * @param tableName
 *            The table to read data from
 * @param conditions
 *            The condition which to select data with, eg. '(updated >
 *            20070101 AND length > 0)'
 * @param orderBy
 *            the fieldNames in the orderBy clause.
 * @param fieldNames
 *            The field names in the table
 * @see #setInput(Job, Class, String, String)
 */
public static void setInput(Job job, Class<? extends DBWritable> inputClass, String tableName,
        String conditions, String orderBy, String... fieldNames) {
    job.setInputFormatClass(DBInputFormat.class);
    DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
    dbConf.setInputClass(inputClass);
    dbConf.setInputTableName(tableName);
    dbConf.setInputFieldNames(fieldNames);
    dbConf.setInputConditions(conditions);
    dbConf.setInputOrderBy(orderBy);
}

From source file:co.nubetech.apache.hadoop.DBInputFormat.java

License:Apache License

/**
 * Initializes the map-part of the job with the appropriate input settings.
 * //from ww  w.  ja  v a2 s  .c o m
 * @param job
 *            The map-reduce job
 * @param inputClass
 *            the class object implementing DBWritable, which is the Java
 *            object holding tuple fields.
 * @param inputQuery
 *            the input query to select fields. Example :
 *            "SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
 * @param inputCountQuery
 *            the input query that returns the number of records in the
 *            table. Example : "SELECT COUNT(f1) FROM Mytable"
 * @see #setInput(Job, Class, String, String, String, String...)
 */
public static void setInput(Job job, Class<? extends DBWritable> inputClass, String inputQuery,
        String inputCountQuery) {
    job.setInputFormatClass(DBInputFormat.class);
    DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
    dbConf.setInputClass(inputClass);
    dbConf.setInputQuery(inputQuery);
    dbConf.setInputCountQuery(inputCountQuery);
}

From source file:co.nubetech.apache.hadoop.DBOutputFormat.java

License:Apache License

private static DBConfiguration setOutput(Job job, String tableName) throws IOException {
    job.setOutputFormatClass(DBOutputFormat.class);
    // job.setReduceSpeculativeExecution(false);

    DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());

    dbConf.setOutputTableName(tableName);
    return dbConf;
}

From source file:co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.java

License:Apache License

/**
 * Note that the "orderBy" column is called the "splitBy" in this version.
 * We reuse the same field, but it's not strictly ordering it -- just
 * partitioning the results./*from  w  w w  .  j a  va 2s .  co  m*/
 */
public static void setInput(Job job, String tableName, String conditions, String splitBy, ArrayList params,
        String... fieldNames) throws IOException {
    DBInputFormat.setInput(job, GenericDBWritable.class, tableName, conditions, splitBy, fieldNames);
    if (params != null) {
        DefaultStringifier<ArrayList> stringifier = new DefaultStringifier<ArrayList>(job.getConfiguration(),
                ArrayList.class);
        job.getConfiguration().set(HIHOConf.QUERY_PARAMS, stringifier.toString(params));
        logger.debug("Converted params and saved them into config");
    }
    job.setInputFormatClass(DBQueryInputFormat.class);
}

From source file:co.nubetech.hiho.dedup.DedupJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(args);/*from w  w  w . jav  a 2 s .c om*/
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.setJobName("Dedup job");
    job.setJarByClass(DedupJob.class);

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    if (dedupBy.equals("key")) {
        job.setMapperClass(DedupKeyMapper.class);
        job.setReducerClass(DedupKeyReducer.class);
        job.setMapOutputValueClass(inputValueClass);
    } else if (dedupBy.equals("value")) {
        job.setMapperClass(DedupValueMapper.class);
        job.setReducerClass(DedupValueReducer.class);
        job.setMapOutputValueClass(inputKeyClass);
    }

    job.setInputFormatClass(inputFormatClass);
    if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) {
        DelimitedTextInputFormat.setProperties(job, delimiter, column);
    }

    job.setMapOutputKeyClass(HihoTuple.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    job.setPartitionerClass(HihoHashPartitioner.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
        logger.debug("Output format class is " + job.getOutputFormatClass());
        logger.debug("Class is " + ReflectionUtils
                .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
        job.waitForCompletion(false);
        if (job.isComplete()) {
            Counters counters = job.getCounters();
            totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue();
            badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue();
            output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue();
            duplicateRecords = totalRecordsRead - output;
            logger.info("Total records read are: " + totalRecordsRead);
            logger.info("Bad Records are: " + badRecords);
            logger.info("Output records are: " + output);
            logger.info("Duplicate records are: " + duplicateRecords);
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
    return 0;
}

From source file:co.nubetech.hiho.dedup.DelimitedTextInputFormat.java

License:Apache License

public static void setProperties(Job job, String delim, int column) {
    job.getConfiguration().set(DELIMITER_CONF, delim);
    job.getConfiguration().setInt(COLUMN_CONF, column);
}

From source file:co.nubetech.hiho.dedup.TestDelimitedTextInputFormat.java

License:Apache License

@Test
public void testSetProperties() throws IOException {
    Configuration conf = new Configuration();
    Job job = new Job(conf);
    DelimitedTextInputFormat.setProperties(job, ",", 1);
    assertEquals(",", job.getConfiguration().get(DelimitedTextInputFormat.DELIMITER_CONF));
    assertEquals("1", job.getConfiguration().get(DelimitedTextInputFormat.COLUMN_CONF));
}