List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:co.cask.hydrator.plugin.batchSource.KafkaBatchSource.java
License:Apache License
@Override public void prepareRun(BatchSourceContext context) throws Exception { Job job = JobUtils.createInstance(); Configuration conf = job.getConfiguration(); table = context.getDataset(config.getTableName()); kafkaRequests = KafkaInputFormat.saveKafkaRequests(conf, config.getTopic(), config.getBrokerMap(), config.getPartitions(), config.getInitialPartitionOffsets(), table); context.setInput(/*from w w w.j a va 2 s . c om*/ Input.of(config.referenceName, new SourceInputFormatProvider(KafkaInputFormat.class, conf))); }
From source file:co.cask.hydrator.plugin.sink.HBaseSink.java
License:Apache License
@Override public void prepareRun(BatchSinkContext context) throws Exception { Job job; ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader(); // Switch the context classloader to plugin class' classloader (PluginClassLoader) so that // when Job/Configuration is created, it uses PluginClassLoader to load resources (hbase-default.xml) // which is present in the plugin jar and is not visible in the CombineClassLoader (which is what oldClassLoader // points to). Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); try {// ww w . ja v a 2 s. com job = JobUtils.createInstance(); } finally { // Switch back to the original Thread.currentThread().setContextClassLoader(oldClassLoader); } Configuration conf = job.getConfiguration(); HBaseConfiguration.addHbaseResources(conf); context.addOutput(Output.of(config.referenceName, new HBaseOutputFormatProvider(config, conf)) .alias(config.columnFamily)); }
From source file:co.nubetech.apache.hadoop.DataDrivenDBInputFormat.java
License:Apache License
/** * setInput() takes a custom query and a separate "bounding query" to use * instead of the custom "count query" used by DBInputFormat. *///from www.j a va 2 s. co m public static void setInput(Job job, Class<? extends DBWritable> inputClass, String inputQuery, String inputBoundingQuery) { DBInputFormat.setInput(job, inputClass, inputQuery, ""); job.getConfiguration().set(DBConfiguration.INPUT_BOUNDING_QUERY, inputBoundingQuery); job.setInputFormatClass(DataDrivenDBInputFormat.class); }
From source file:co.nubetech.apache.hadoop.DBInputFormat.java
License:Apache License
/** * Initializes the map-part of the job with the appropriate input settings. * // ww w .j ava 2 s . co m * @param job * The map-reduce job * @param inputClass * the class object implementing DBWritable, which is the Java * object holding tuple fields. * @param tableName * The table to read data from * @param conditions * The condition which to select data with, eg. '(updated > * 20070101 AND length > 0)' * @param orderBy * the fieldNames in the orderBy clause. * @param fieldNames * The field names in the table * @see #setInput(Job, Class, String, String) */ public static void setInput(Job job, Class<? extends DBWritable> inputClass, String tableName, String conditions, String orderBy, String... fieldNames) { job.setInputFormatClass(DBInputFormat.class); DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); dbConf.setInputClass(inputClass); dbConf.setInputTableName(tableName); dbConf.setInputFieldNames(fieldNames); dbConf.setInputConditions(conditions); dbConf.setInputOrderBy(orderBy); }
From source file:co.nubetech.apache.hadoop.DBInputFormat.java
License:Apache License
/** * Initializes the map-part of the job with the appropriate input settings. * //from ww w. ja v a2 s .c o m * @param job * The map-reduce job * @param inputClass * the class object implementing DBWritable, which is the Java * object holding tuple fields. * @param inputQuery * the input query to select fields. Example : * "SELECT f1, f2, f3 FROM Mytable ORDER BY f1" * @param inputCountQuery * the input query that returns the number of records in the * table. Example : "SELECT COUNT(f1) FROM Mytable" * @see #setInput(Job, Class, String, String, String, String...) */ public static void setInput(Job job, Class<? extends DBWritable> inputClass, String inputQuery, String inputCountQuery) { job.setInputFormatClass(DBInputFormat.class); DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); dbConf.setInputClass(inputClass); dbConf.setInputQuery(inputQuery); dbConf.setInputCountQuery(inputCountQuery); }
From source file:co.nubetech.apache.hadoop.DBOutputFormat.java
License:Apache License
private static DBConfiguration setOutput(Job job, String tableName) throws IOException { job.setOutputFormatClass(DBOutputFormat.class); // job.setReduceSpeculativeExecution(false); DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); dbConf.setOutputTableName(tableName); return dbConf; }
From source file:co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.java
License:Apache License
/** * Note that the "orderBy" column is called the "splitBy" in this version. * We reuse the same field, but it's not strictly ordering it -- just * partitioning the results./*from w w w . j a va 2s . co m*/ */ public static void setInput(Job job, String tableName, String conditions, String splitBy, ArrayList params, String... fieldNames) throws IOException { DBInputFormat.setInput(job, GenericDBWritable.class, tableName, conditions, splitBy, fieldNames); if (params != null) { DefaultStringifier<ArrayList> stringifier = new DefaultStringifier<ArrayList>(job.getConfiguration(), ArrayList.class); job.getConfiguration().set(HIHOConf.QUERY_PARAMS, stringifier.toString(params)); logger.debug("Converted params and saved them into config"); } job.setInputFormatClass(DBQueryInputFormat.class); }
From source file:co.nubetech.hiho.dedup.DedupJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); populateConfiguration(args);/*from w w w . jav a 2 s .c om*/ try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.setJobName("Dedup job"); job.setJarByClass(DedupJob.class); Class inputFormatClass = Class.forName(inputFormat); Class outputFormatClass = Class.forName(outputFormat); Class inputKeyClass = Class.forName(inputKeyClassName); Class inputValueClass = Class.forName(inputValueClassName); if (dedupBy.equals("key")) { job.setMapperClass(DedupKeyMapper.class); job.setReducerClass(DedupKeyReducer.class); job.setMapOutputValueClass(inputValueClass); } else if (dedupBy.equals("value")) { job.setMapperClass(DedupValueMapper.class); job.setReducerClass(DedupValueReducer.class); job.setMapOutputValueClass(inputKeyClass); } job.setInputFormatClass(inputFormatClass); if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) { DelimitedTextInputFormat.setProperties(job, delimiter, column); } job.setMapOutputKeyClass(HihoTuple.class); job.setOutputKeyClass(inputKeyClass); job.setOutputValueClass(inputValueClass); job.setPartitionerClass(HihoHashPartitioner.class); FileInputFormat.setInputPaths(job, inputPath); job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(outputPath)); try { logger.debug("Output format class is " + job.getOutputFormatClass()); logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (job.isComplete()) { Counters counters = job.getCounters(); totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue(); badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue(); output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue(); duplicateRecords = totalRecordsRead - output; logger.info("Total records read are: " + totalRecordsRead); logger.info("Bad Records are: " + badRecords); logger.info("Output records are: " + output); logger.info("Duplicate records are: " + duplicateRecords); } } catch (Exception e) { e.printStackTrace(); } return 0; }
From source file:co.nubetech.hiho.dedup.DelimitedTextInputFormat.java
License:Apache License
public static void setProperties(Job job, String delim, int column) { job.getConfiguration().set(DELIMITER_CONF, delim); job.getConfiguration().setInt(COLUMN_CONF, column); }
From source file:co.nubetech.hiho.dedup.TestDelimitedTextInputFormat.java
License:Apache License
@Test public void testSetProperties() throws IOException { Configuration conf = new Configuration(); Job job = new Job(conf); DelimitedTextInputFormat.setProperties(job, ",", 1); assertEquals(",", job.getConfiguration().get(DelimitedTextInputFormat.DELIMITER_CONF)); assertEquals("1", job.getConfiguration().get(DelimitedTextInputFormat.COLUMN_CONF)); }