List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:com.elixir.hadoop.Chromo.FragmentCoverage.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from w ww . j a v a2 s . c om } Job job = Job.getInstance(conf, "position"); job.setJarByClass(FragmentCoverage.class); job.setMapperClass(CoverageMapper.class); job.setCombinerClass(IntSumReducer.class); job.setNumReduceTasks(5); job.setMapOutputKeyClass(com.elixir.hadoop.Chromo.SecondrySort.IntPair.class); //job.setSpeculativeExecution(true); job.setPartitionerClass(ChromoPartitioner.class); job.setGroupingComparatorClass(com.elixir.hadoop.Chromo.SecondrySort.FirstGroupingComparator.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // job.setOutputFormatClass(Text.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.elixir.hadoop.FragmentCoverage.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/*www. j av a 2 s . co m*/ } Job job = Job.getInstance(conf, "position"); job.setJarByClass(FragmentCoverage.class); job.setMapperClass(CoverageMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.elixir.hadoop.OddEven.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from w w w. ja v a 2 s.co m } Job job = Job.getInstance(conf, "oddeven"); job.setJarByClass(OddEven.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.elixir.hadoop.Word.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from w ww . j a v a2s . co m } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.elixir.hadoop.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/* www .ja v a 2 s. c om*/ } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ema.hadoop.bestclient.BestClient.java
public static void main(String[] args) throws Exception { if (args.length != 4) { System.err.println("Usage: BestClient <input path> <output path> <date start> <date end>"); System.exit(-1);//from w w w . jav a2s. c o m } Job job = Job.getInstance(); job.setJarByClass(BestClient.class); job.setJobName("Best client job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.setStrings("dates", args[2], args[3]); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(BCMapper.class); job.setReducerClass(BCReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ema.hadoop.wordcount.WordCount.java
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: WordCount <input path> <output path>"); System.exit(-1);/*from ww w . j av a 2s . c o m*/ } Job job = Job.getInstance(); job.setJarByClass(WordCount.class); job.setJobName("Word count job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(WCMapper.class); job.setReducerClass(WCReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ema.hadoop.wordcount.WordCount_cache.java
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: WordCount <input path> <output path>"); System.exit(-1);//from w w w .j a va2 s .c o m } // First we write the stop word list // it could also be a file manually loaded into HDFS String[] stopwords = { "the", "a" }; Configuration configuration = new Configuration(); FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration); Path file = new Path("hdfs://localhost:9000/user/student/stop_words.txt"); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file, new Progressable() { @Override public void progress() { out.println("...bytes written"); } }); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); for (String w : stopwords) { br.write(w + "\n"); } br.close(); hdfs.close(); Job job = Job.getInstance(); job.addCacheFile(new Path("hdfs://localhost:9000/user/student/stop_words.txt").toUri()); job.setJarByClass(WordCount_cache.class); job.setJobName("Word count job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(WCMapper_cache.class); job.setReducerClass(WCReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { HbaseConfiguration hConf = new HbaseConfiguration(conf.getConfiguration(), HbaseConfiguration.FLAG_HBASE_OUTPUT); // ??/* ww w .j a v a 2s . co m*/ String tableName = hConf.getOutputHBaseTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "HBase??<" + HbaseConfiguration.OUTPUT_TABLE + ">?."; LOG.error(meg); throw new Exception(meg); } // ? String hbaseFieldNames = hConf.getOutputHBaseFieldNames(); this.vParamTargetFamilyNames(hbaseFieldNames, hConf); hConf.setOutputHBaseFamilyNames(this.getHBaseFamilyNames(hbaseFieldNames)); // String rowKeyRule = hConf.getOutputHBaseRowKeyRule(); if (null == rowKeyRule || rowKeyRule.trim().length() <= 0) { String meg = "<" + HbaseConfiguration.OUTPUT_ROWKEY_RULE + ">"; LOG.error(meg); throw new Exception(meg); } // HFile long hfileMaxfilesize = hConf.getOutputHBaseHFileMaxfilesize(); if (hfileMaxfilesize <= 0) { String meg = "HFile<" + HbaseConfiguration.OUTPUT_HFILE_MAXFILESIZE + ">0"; LOG.error(meg); throw new Exception(meg); } // memstore flushHDFS? long memstoreFlushSize = hConf.getOutputHBaseMemstoreFlushSize(); if (memstoreFlushSize <= 0) { String meg = "memstore flushHDFS?<" + HbaseConfiguration.OUTPUT_MEMSTORE_FLUSHSIZE + ">??0"; LOG.error(meg); throw new Exception(meg); } // ?? int colmunBlocksize = hConf.getOutputHBaseColmunBlocksize(); if (colmunBlocksize <= 0) { String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_BLOCKSIZE + ">0"; LOG.error(meg); throw new Exception(meg); } // ? int colmunMaxversion = hConf.getOutputHBaseColmunMaxversion(); if (colmunMaxversion <= 0) { String meg = "?<" + HbaseConfiguration.OUTPUT_COLMUN_MAXVERSION + ">0"; LOG.error(meg); throw new Exception(meg); } // ?? int colmunMinversion = hConf.getOutputHBaseColmunMinversion(); if (colmunMinversion <= 0) { String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_MINVERSION + ">0"; LOG.error(meg); throw new Exception(meg); } // ???? int commitBufferLength = hConf.getOutputHBaseBufferLength(); if (commitBufferLength <= 0) { String meg = "????<" + HbaseConfiguration.OUTPUT_SET_COMMIT_BUFFERLENGTH + ">0"; LOG.error(meg); throw new Exception(meg); } // ?hbaseWAL int walFlag = hConf.getOutputHBaseSetWalFlags(); if (!(walFlag == -1 || walFlag >= 0 || walFlag <= 4)) { String meg = "WAL<" + HbaseConfiguration.OUTPUT_SET_WAL_FLAG + ">?-1??:[0-4]"; LOG.error(meg); throw new Exception(meg); } // if (!validateTable(hConf)) { String errorInfo = "HBase output table, validate Execption!"; MRLog.error(LOG, errorInfo); throw new Exception(errorInfo); } conf.setOutputFormatClass(HbaseOutputFormat.class); conf.setReduceSpeculativeExecution(false); conf.setOutputKeyClass(DBRecord.class); conf.setOutputValueClass(NullWritable.class); conf.setReducerClass(DBReducer.class); // ?? printTableDesc(tableName, hConf.getConf()); }
From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { /**// w w w.ja v a 2 s . com * ? */ HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration()); // ? String outRowChars = hconf.getOutputHiveFileRowsSplitChars(); if (null == outRowChars || outRowChars.length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ROWS_SPLITCHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } // String outFileSplitChars = hconf.getOutputHiveFileFieldSplitChars(); if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_FIELD_SPLITCHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } boolean para = hconf.getOutputHiveCompress(); // ? (?HDFSUtils.CompressCodec) String outCompressCodec = hconf.getOutputHiveCompressCodec(); if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?MR String outTargetpath = hconf.getOutputTargetFilePath(); hconf.setOutputTargetPath(outTargetpath); if (null == outTargetpath || outTargetpath.trim().length() <= 0) { MRLog.warn(LOG, "MR<" + HiveConfiguration.OUTPUT_HIVE_TARGET_PATH + ">"); } // ?hive?? String hiveUrl = hconf.getOutPutHiveConfigUrl(); if (null == hiveUrl || hiveUrl.trim().length() <= 0) { String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_URL + ">?."; LOG.error(meg); throw new Exception(meg); } // hive??? String hiveUser = hconf.getOutPutHiveConfigUser(); if (null == hiveUser || hiveUser.trim().length() <= 0) { LOG.warn("[MR WARN]hive???<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_USER + ">."); } // hive?? String hivePwd = hconf.getOutPutHiveConfigPassword(); if (null == hivePwd || hivePwd.trim().length() <= 0) { LOG.warn("[MR WARN]hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_PASSWORD + ">."); } // ?? String tableName = hconf.getOutputHiveTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_TABLE + ">?."; LOG.error(meg); throw new Exception(meg); } // ?? String partitionField[] = hconf.getOutputHivePartitionField(); if (null != partitionField && partitionField.length > 0) { // String[] outputFieldName = hconf.getOutputFieldNames(); if (null == outputFieldName || outputFieldName.length <= 0) { String meg = "<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } for (int i = 0; i < partitionField.length; i++) { boolean isExist = false; for (String s : outputFieldName) { if (s.equals(partitionField[i])) { isExist = true; break; } } if (!isExist) { String meg = "" + partitionField[i] + "<" + HiveConfiguration.OUTPUT_HIVE_PARTITION_FIELD + ">?<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ""; MRLog.error(LOG, meg); throw new Exception(meg); } } String orderOutputTempPath = hconf.getOutputHiveOrderTempPath(); if (null == orderOutputTempPath || orderOutputTempPath.trim().length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } String orderOutputFileNamePrefix = hconf.getOutputHiveOrderFileNamePrefix(); if (null == orderOutputFileNamePrefix || orderOutputFileNamePrefix.trim().length() <= 0) { String meg = "???<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">."; MRLog.warn(LOG, meg); } long orderOutputFileMaxCount = hconf.getOutputHiveOrderFileMaxCount(); if (orderOutputFileMaxCount == 0) { String meg = "?<" + HiveConfiguration.OUTPUT_HIVE_ORDER_FILEMAXCOUNT + ">0 -1(??)."; MRLog.error(LOG, meg); throw new Exception(meg); } } // DDL? String ddlHQL = hconf.getOutputHiveExecuteDDLHQL(); if (null == ddlHQL || ddlHQL.trim().length() <= 0) { LOG.warn("[MR WARN]hive?<" + HiveConfiguration.OUTPUT_HIVE_DDL_HQL + ">."); } try { executeDDLHQL(hconf); MRLog.info(LOG, "execute ddl hive sql success!"); } catch (SQLException e) { MRLog.error(LOG, "execute ddl hive sql error!"); e.printStackTrace(); } conf.setReduceSpeculativeExecution(false); conf.setOutputFormatClass(HiveOutputFormat.class); conf.setOutputKeyClass(DBRecord.class); conf.setOutputValueClass(NullWritable.class); if (null != partitionField && partitionField.length > 0) { conf.setCombinerClass(DBGroupReducer.class); conf.setReducerClass(DBPartitionReducer.class); } else { conf.setCombinerClass(DBGroupReducer.class); conf.setReducerClass(DBReducer.class); } }