List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:com.elex.dmp.vectorizer.FixDictionaryVectorizer.java
License:Apache License
/** * Count the frequencies of words in parallel using Map/Reduce. The input documents have to be in * {@link SequenceFile} format/*from w ww . ja va2s. c o m*/ */ private static void startWordCounting(Path input, Path output, Configuration baseConf, int minSupport) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); // this conf parameter needs to be set enable serialisation of conf values conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInt(MIN_SUPPORT, minSupport); Job job = new Job(conf); job.setJobName("DictionaryVectorizer::WordCount: input-folder: " + input); job.setJarByClass(FixDictionaryVectorizer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(TermCountMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setCombinerClass(TermCountCombiner.class); job.setReducerClass(TermCountReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); HadoopUtil.delete(conf, output); boolean succeeded = job.waitForCompletion(true); if (!succeeded) throw new IllegalStateException("Job failed!"); }
From source file:com.ery.hadoop.mrddx.file.RCFileOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { /**/*from w w w. j a v a 2 s. c o m*/ * ? */ FileConfiguration dbconf = new FileConfiguration(conf.getConfiguration(), FileConfiguration.FLAG_FILE_OUTPUT); // ? String outRowChars = dbconf.getOutputFileRowsSplitChars(); if (null == outRowChars || outRowChars.length() <= 0) { String meg = "<" + FileConfiguration.OUTPUT_FILE_ROWS_SPLIT_CHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } // String outFileSplitChars = dbconf.getOutputFileFieldSplitChars(); if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) { String meg = "<" + FileConfiguration.OUTPUT_FILE_FIELD_SPLIT_CHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } boolean para = dbconf.getOutputFileCompress(); // ? (?HDFSUtils.CompressCodec) String outCompressCodec = dbconf.getOutputFileCompressCodec(); if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + FileConfiguration.OUTPUT_FILE_COMPRESSCODEC + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?BZip2Codec if (HDFSUtils.isBZip2CompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC + ">??BZip2Codec."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?MR String outTargetpath = dbconf.getOutputTargetFilePath(); dbconf.setOutputTargetPath(outTargetpath); if (null == outTargetpath || outTargetpath.trim().length() <= 0) { MRLog.warn(LOG, "MR<" + FileConfiguration.OUTPUT_FILE_TARGET_PATH + ">"); } setColumnNumber(conf.getConfiguration(), dbconf.getOutputFieldNames().length); conf.setOutputFormatClass(RCFileOutputFormat.class); conf.setReducerClass(DBReducer.class); }
From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java
License:Apache License
/** * format?//from ww w .j a v a 2 s .com * * @param job * ? * @param tableName * ?? * @return HbaseConfiguration HBase? * @throws IOException * IO */ public static HbaseConfiguration setOutput(Job job, String tableName) { job.setOutputFormatClass(HbaseOutputFormat.class); job.setReduceSpeculativeExecution(false); HbaseConfiguration dbConf = new HbaseConfiguration(job.getConfiguration(), HbaseConfiguration.FLAG_HBASE_OUTPUT); dbConf.setOutputHBaseTableName(tableName); return dbConf; }
From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { HbaseConfiguration hConf = new HbaseConfiguration(conf.getConfiguration(), HbaseConfiguration.FLAG_HBASE_OUTPUT); // ??/* w w w . ja v a 2 s .c o m*/ String tableName = hConf.getOutputHBaseTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "HBase??<" + HbaseConfiguration.OUTPUT_TABLE + ">?."; LOG.error(meg); throw new Exception(meg); } // ? String hbaseFieldNames = hConf.getOutputHBaseFieldNames(); this.vParamTargetFamilyNames(hbaseFieldNames, hConf); hConf.setOutputHBaseFamilyNames(this.getHBaseFamilyNames(hbaseFieldNames)); // String rowKeyRule = hConf.getOutputHBaseRowKeyRule(); if (null == rowKeyRule || rowKeyRule.trim().length() <= 0) { String meg = "<" + HbaseConfiguration.OUTPUT_ROWKEY_RULE + ">"; LOG.error(meg); throw new Exception(meg); } // HFile long hfileMaxfilesize = hConf.getOutputHBaseHFileMaxfilesize(); if (hfileMaxfilesize <= 0) { String meg = "HFile<" + HbaseConfiguration.OUTPUT_HFILE_MAXFILESIZE + ">0"; LOG.error(meg); throw new Exception(meg); } // memstore flushHDFS? long memstoreFlushSize = hConf.getOutputHBaseMemstoreFlushSize(); if (memstoreFlushSize <= 0) { String meg = "memstore flushHDFS?<" + HbaseConfiguration.OUTPUT_MEMSTORE_FLUSHSIZE + ">??0"; LOG.error(meg); throw new Exception(meg); } // ?? int colmunBlocksize = hConf.getOutputHBaseColmunBlocksize(); if (colmunBlocksize <= 0) { String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_BLOCKSIZE + ">0"; LOG.error(meg); throw new Exception(meg); } // ? int colmunMaxversion = hConf.getOutputHBaseColmunMaxversion(); if (colmunMaxversion <= 0) { String meg = "?<" + HbaseConfiguration.OUTPUT_COLMUN_MAXVERSION + ">0"; LOG.error(meg); throw new Exception(meg); } // ?? int colmunMinversion = hConf.getOutputHBaseColmunMinversion(); if (colmunMinversion <= 0) { String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_MINVERSION + ">0"; LOG.error(meg); throw new Exception(meg); } // ???? int commitBufferLength = hConf.getOutputHBaseBufferLength(); if (commitBufferLength <= 0) { String meg = "????<" + HbaseConfiguration.OUTPUT_SET_COMMIT_BUFFERLENGTH + ">0"; LOG.error(meg); throw new Exception(meg); } // ?hbaseWAL int walFlag = hConf.getOutputHBaseSetWalFlags(); if (!(walFlag == -1 || walFlag >= 0 || walFlag <= 4)) { String meg = "WAL<" + HbaseConfiguration.OUTPUT_SET_WAL_FLAG + ">?-1??:[0-4]"; LOG.error(meg); throw new Exception(meg); } // if (!validateTable(hConf)) { String errorInfo = "HBase output table, validate Execption!"; MRLog.error(LOG, errorInfo); throw new Exception(errorInfo); } conf.setOutputFormatClass(HbaseOutputFormat.class); conf.setReduceSpeculativeExecution(false); conf.setOutputKeyClass(DBRecord.class); conf.setOutputValueClass(NullWritable.class); conf.setReducerClass(DBReducer.class); // ?? printTableDesc(tableName, hConf.getConf()); }
From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java
License:Apache License
/** * ???/*from www . jav a 2 s . c o m*/ * * @param job jobconf * @param tableName ?? */ public static void setOutput(Job job, String tableName) { job.setOutputFormatClass(HiveOutputFormat.class); job.setReduceSpeculativeExecution(false); HiveConfiguration dbConf = new HiveConfiguration(job.getConfiguration()); dbConf.setOutputHiveTableName(tableName); }
From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { /**/*from w w w. ja v a2 s .com*/ * ? */ HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration()); // ? String outRowChars = hconf.getOutputHiveFileRowsSplitChars(); if (null == outRowChars || outRowChars.length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ROWS_SPLITCHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } // String outFileSplitChars = hconf.getOutputHiveFileFieldSplitChars(); if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_FIELD_SPLITCHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } boolean para = hconf.getOutputHiveCompress(); // ? (?HDFSUtils.CompressCodec) String outCompressCodec = hconf.getOutputHiveCompressCodec(); if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?MR String outTargetpath = hconf.getOutputTargetFilePath(); hconf.setOutputTargetPath(outTargetpath); if (null == outTargetpath || outTargetpath.trim().length() <= 0) { MRLog.warn(LOG, "MR<" + HiveConfiguration.OUTPUT_HIVE_TARGET_PATH + ">"); } // ?hive?? String hiveUrl = hconf.getOutPutHiveConfigUrl(); if (null == hiveUrl || hiveUrl.trim().length() <= 0) { String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_URL + ">?."; LOG.error(meg); throw new Exception(meg); } // hive??? String hiveUser = hconf.getOutPutHiveConfigUser(); if (null == hiveUser || hiveUser.trim().length() <= 0) { LOG.warn("[MR WARN]hive???<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_USER + ">."); } // hive?? String hivePwd = hconf.getOutPutHiveConfigPassword(); if (null == hivePwd || hivePwd.trim().length() <= 0) { LOG.warn("[MR WARN]hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_PASSWORD + ">."); } // ?? String tableName = hconf.getOutputHiveTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_TABLE + ">?."; LOG.error(meg); throw new Exception(meg); } // ?? String partitionField[] = hconf.getOutputHivePartitionField(); if (null != partitionField && partitionField.length > 0) { // String[] outputFieldName = hconf.getOutputFieldNames(); if (null == outputFieldName || outputFieldName.length <= 0) { String meg = "<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } for (int i = 0; i < partitionField.length; i++) { boolean isExist = false; for (String s : outputFieldName) { if (s.equals(partitionField[i])) { isExist = true; break; } } if (!isExist) { String meg = "" + partitionField[i] + "<" + HiveConfiguration.OUTPUT_HIVE_PARTITION_FIELD + ">?<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ""; MRLog.error(LOG, meg); throw new Exception(meg); } } String orderOutputTempPath = hconf.getOutputHiveOrderTempPath(); if (null == orderOutputTempPath || orderOutputTempPath.trim().length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } String orderOutputFileNamePrefix = hconf.getOutputHiveOrderFileNamePrefix(); if (null == orderOutputFileNamePrefix || orderOutputFileNamePrefix.trim().length() <= 0) { String meg = "???<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">."; MRLog.warn(LOG, meg); } long orderOutputFileMaxCount = hconf.getOutputHiveOrderFileMaxCount(); if (orderOutputFileMaxCount == 0) { String meg = "?<" + HiveConfiguration.OUTPUT_HIVE_ORDER_FILEMAXCOUNT + ">0 -1(??)."; MRLog.error(LOG, meg); throw new Exception(meg); } } // DDL? String ddlHQL = hconf.getOutputHiveExecuteDDLHQL(); if (null == ddlHQL || ddlHQL.trim().length() <= 0) { LOG.warn("[MR WARN]hive?<" + HiveConfiguration.OUTPUT_HIVE_DDL_HQL + ">."); } try { executeDDLHQL(hconf); MRLog.info(LOG, "execute ddl hive sql success!"); } catch (SQLException e) { MRLog.error(LOG, "execute ddl hive sql error!"); e.printStackTrace(); } conf.setReduceSpeculativeExecution(false); conf.setOutputFormatClass(HiveOutputFormat.class); conf.setOutputKeyClass(DBRecord.class); conf.setOutputValueClass(NullWritable.class); if (null != partitionField && partitionField.length > 0) { conf.setCombinerClass(DBGroupReducer.class); conf.setReducerClass(DBPartitionReducer.class); } else { conf.setCombinerClass(DBGroupReducer.class); conf.setReducerClass(DBReducer.class); } }
From source file:com.ery.hadoop.mrddx.hive.HiveRCFileOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { super.handle(conf); HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration()); // ? (?HDFSUtils.CompressCodec) String outCompressCodec = hconf.getOutputHiveCompressCodec(); // ?BZip2Codec if (HDFSUtils.isBZip2CompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC + ">??BZip2Codec."; MRLog.error(LOG, meg);/* w w w . ja va 2 s . c o m*/ throw new Exception(meg); } setColumnNumber(conf.getConfiguration(), hconf.getOutputFieldNames().length); conf.setOutputFormatClass(HiveRCFileOutputFormat.class); }
From source file:com.ery.hadoop.mrddx.hive.HiveSequenceFileOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { super.handle(conf); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); conf.setOutputFormatClass(HiveSequenceFileOutputFormat.class); }
From source file:com.example.bigtable.sample.CellCounter.java
License:Apache License
/** * Sets up the actual job./* w ww . j a va2s. c o m*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2] : ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }
From source file:com.example.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Your job name"); job.setJarByClass(Driver.class); logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: " + Arrays.toString(args)); if (args.length < 2) { logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar() + " input_files output_directory"); return 1; }/*from w w w . j a v a2 s.c o m*/ job.setMapperClass(WordcountMapper.class); logger.info("mapper class is " + job.getMapperClass()); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(IntWritable.class); logger.info("mapper output key class is " + job.getMapOutputKeyClass()); logger.info("mapper output value class is " + job.getMapOutputValueClass()); job.setReducerClass(WordcountReducer.class); logger.info("reducer class is " + job.getReducerClass()); job.setCombinerClass(WordcountReducer.class); logger.info("combiner class is " + job.getCombinerClass()); //When you are not runnign any Reducer //OR job.setNumReduceTasks(0); // logger.info("number of reduce task is " + job.getNumReduceTasks()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); logger.info("output key class is " + job.getOutputKeyClass()); logger.info("output value class is " + job.getOutputValueClass()); job.setInputFormatClass(TextInputFormat.class); logger.info("input format class is " + job.getInputFormatClass()); job.setOutputFormatClass(TextOutputFormat.class); logger.info("output format class is " + job.getOutputFormatClass()); Path filePath = new Path(args[0]); logger.info("input path " + filePath); FileInputFormat.setInputPaths(job, filePath); Path outputPath = new Path(args[1]); logger.info("output path " + outputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0; }