List of usage examples for org.apache.hadoop.mapreduce Job setInputFormatClass
public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException
From source file:co.nubetech.hiho.job.ExportDelimitedToDB.java
License:Apache License
public int run(String[] args) throws IOException { Configuration conf = getConf(); Job job = new Job(conf); job.setJobName("MySQLBulkLoading"); job.setMapperClass(DelimitedLoadMapper.class); job.setJarByClass(DelimitedLoadMapper.class); job.setNumReduceTasks(0);//www .j a v a 2s .c o m job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(args[0])); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputFormatClass(DBOutputFormat.class); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.job.ExportToDB.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); populateConfiguration(args, conf);/* www . j av a 2 s. co m*/ try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1)); job.setJobName("HihoDBExport"); job.setMapperClass(GenericDBLoadDataMapper.class); job.setJarByClass(ExportToDB.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(inputPath)); GenericDBOutputFormat.setOutput(job, tableName, columnNames); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.job.ExportToFTPServer.java
License:Apache License
@Override public int run(String[] args) throws IOException { Configuration conf = getConf(); populateConfiguration(args, conf);//from w ww.jav a2s .com try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new IOException(e1); } for (Entry<String, String> entry : conf) { logger.debug("key, value " + entry.getKey() + "=" + entry.getValue()); } Job job = new Job(conf); job.setMapperClass(TokenCounterMapper.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(inputPath)); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(FTPTextOutputFormat.class); FTPTextOutputFormat.setOutputPath(job, new Path(outputPath)); job.setJarByClass(ExportToFTPServer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setNumReduceTasks(2); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.job.ExportToMySQLDB.java
License:Apache License
@Override public int run(String[] args) throws IOException { Configuration conf = getConf(); populateConfiguration(args, conf);// w w w .ja va 2s .c om try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new IOException(e1); } Job job = new Job(conf); job.setJobName("MySQLBulkLoading"); job.setMapperClass(MySQLLoadDataMapper.class); job.setJarByClass(MySQLLoadDataMapper.class); for (Entry<String, String> entry : conf) { logger.debug("key, value " + entry.getKey() + "=" + entry.getValue()); } // verify required properties are loaded logger.debug(conf.get(DBConfiguration.URL_PROPERTY)); logger.debug(conf.get(DBConfiguration.USERNAME_PROPERTY)); logger.debug(conf.get(DBConfiguration.PASSWORD_PROPERTY)); job.setNumReduceTasks(0); job.setInputFormatClass(FileStreamInputFormat.class); FileStreamInputFormat.addInputPath(job, new Path(inputPath)); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); // job.setJarByClass(com.mysql.jdbc.Driver.class); job.setOutputFormatClass(NullOutputFormat.class); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.job.ExportToOracleDb.java
License:Apache License
@Override public int run(String[] args) throws IOException { Configuration conf = getConf(); for (Entry<String, String> entry : conf) { logger.debug("key, value " + entry.getKey() + "=" + entry.getValue()); }//from w w w .j av a2 s. c om for (int i = 0; i < args.length; i++) { logger.debug("Remaining arguments are" + " " + args[i]); } populateConfiguration(args, conf); try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new IOException(e1); } Job job = new Job(conf); job.setJobName("OracleLoading"); job.setMapperClass(OracleLoadMapper.class); job.setJarByClass(ExportToOracleDb.class); job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1)); try { // we first create the external table definition String query = conf.get(HIHOConf.EXTERNAL_TABLE_DML); // create table if user has specified if (query != null) { this.runQuery(query, conf); } } catch (HIHOException e1) { e1.printStackTrace(); } // verify required properties are loaded job.setNumReduceTasks(0); job.setInputFormatClass(FileStreamInputFormat.class); FileStreamInputFormat.addInputPath(job, new Path(inputPath)); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); // job.setJarByClass(com.mysql.jdbc.Driver.class); job.setOutputFormatClass(NullOutputFormat.class); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } // run alter table query and add locations try { this.runQuery(getAlterTableDML(new Path(inputPath), conf), conf); } catch (HIHOException e1) { e1.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.job.sf.ExportSalesForceJob.java
License:Apache License
@Override public int run(String[] arg0) throws Exception { Configuration conf = getConf(); populateConfiguration(arg0, conf);/*from w ww. ja v a2 s . co m*/ try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.setJobName("SaleForceLoading"); job.setMapperClass(SalesForceLoadMapper.class); job.setJarByClass(SalesForceLoadMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(inputPath)); // NLineInputFormat.setNumLinesPerSplit(job, 10); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputFormatClass(NullOutputFormat.class); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.mapreduce.lib.db.DBQueryInputFormat.java
License:Apache License
/** * setInput() takes a custom query and a separate "bounding query" to use * instead of the custom "count query" used by DBInputFormat. *//*from w w w . j av a 2 s . com*/ public static void setInput(Job job, String inputQuery, String inputBoundingQuery, ArrayList params) throws IOException { DBInputFormat.setInput(job, GenericDBWritable.class, inputQuery, ""); if (inputBoundingQuery != null) { job.getConfiguration().set(DBConfiguration.INPUT_BOUNDING_QUERY, inputBoundingQuery); } if (params != null) { DefaultStringifier<ArrayList> stringifier = new DefaultStringifier<ArrayList>(job.getConfiguration(), ArrayList.class); job.getConfiguration().set(HIHOConf.QUERY_PARAMS, stringifier.toString(params)); logger.debug("Converted params and saved them into config"); } job.setInputFormatClass(DBQueryInputFormat.class); }
From source file:co.nubetech.hiho.merge.MergeJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { populateConfiguration(args);/*from www. j a v a 2 s .c o m*/ try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Class inputFormatClass = Class.forName(inputFormat); Class outputFormatClass = Class.forName(outputFormat); Class inputKeyClass = Class.forName(inputKeyClassName); Class inputValueClass = Class.forName(inputValueClassName); Configuration conf = getConf(); conf.set(HIHOConf.MERGE_OLD_PATH, oldPath); conf.set(HIHOConf.MERGE_NEW_PATH, newPath); Job job = new Job(conf); job.setJobName("Merge job"); job.setJarByClass(MergeJob.class); if (mergeBy.equals("key")) { job.setMapperClass(MergeKeyMapper.class); job.setReducerClass(MergeKeyReducer.class); } else if (mergeBy.equals("value")) { job.setMapperClass(MergeValueMapper.class); job.setReducerClass(MergeValueReducer.class); } job.setInputFormatClass(inputFormatClass); DelimitedTextInputFormat.setProperties(job, delimiter, column); job.setMapOutputKeyClass(HihoTuple.class); job.setMapOutputValueClass(HihoValue.class); job.setOutputKeyClass(inputKeyClass); job.setOutputValueClass(inputValueClass); FileInputFormat.setInputPaths(job, oldPath + "," + newPath); job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(outputPath)); try { logger.debug("Output format class is " + job.getOutputFormatClass()); logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (job.isComplete()) { Counters counters = job.getCounters(); totalRecordsOld = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_OLD).getValue(); totalRecordsNew = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue(); badRecords = counters.findCounter(MergeRecordCounter.BAD_RECORD).getValue(); output = counters.findCounter(MergeRecordCounter.OUTPUT).getValue(); logger.info("Total old records read are: " + totalRecordsOld); logger.info("Total new records read are: " + totalRecordsNew); logger.info("Bad Records are: " + badRecords); logger.info("Output records are: " + output); } } catch (Exception e) { e.printStackTrace(); } return 0; }
From source file:co.nubetech.hiho.similarity.ngram.NGramJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); populateConfiguration(args);/*from w w w. j a v a2 s . c o m*/ try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.setJobName("NGram job"); job.setJarByClass(NGramJob.class); Class inputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat"); Class outputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"); // org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat // org.apache.hadoop.mapreduce.lib.output.TextOutputFormat Class inputKeyClass = Class.forName("org.apache.hadoop.io.Text"); Class inputValueClass = Class.forName("org.apache.hadoop.io.Text"); Class outputKeyClass = Class.forName("co.nubetech.hiho.similarity.ngram.ValuePair"); Class outputValueClass = Class.forName("org.apache.hadoop.io.IntWritable"); job.setMapperClass(NGramMapper.class); job.setReducerClass(NGramReducer.class); job.setInputFormatClass(inputFormatClass); job.setMapOutputKeyClass(inputKeyClass); job.setMapOutputValueClass(inputValueClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); job.setOutputFormatClass(outputFormatClass); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path("outputOfNGramJob")); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.similarity.ngram.ScoreJob.java
License:Apache License
@Override public int run(String[] arg0) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); job.setJobName("Score job"); job.setJarByClass(ScoreJob.class); Class inputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"); Class outputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"); // org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat // org.apache.hadoop.mapreduce.lib.output.TextOutputFormat Class inputKeyClass = Class.forName("co.nubetech.hiho.similarity.ngram.ValuePair"); Class inputValueClass = Class.forName("org.apache.hadoop.io.IntWritable"); Class outputKeyClass = Class.forName("co.nubetech.hiho.similarity.ngram.ValuePair"); Class outputValueClass = Class.forName("org.apache.hadoop.io.LongWritable"); job.setMapperClass(ScoreMapper.class); job.setReducerClass(ScoreReducer.class); job.setInputFormatClass(inputFormatClass); job.setMapOutputKeyClass(inputKeyClass); job.setMapOutputValueClass(inputValueClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); job.setOutputFormatClass(outputFormatClass); FileInputFormat.setInputPaths(job, "outputOfNGramJob"); FileOutputFormat.setOutputPath(job, new Path("outputOfScoreJob")); int ret = 0;//from w w w . j a v a 2 s .c o m try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }