List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:com.elixir.hadoop.Word.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);// w w w. j av a 2 s .com } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.elixir.hadoop.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from w ww . j a v a2 s . c o m } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ery.hadoop.mrddx.hbase.HbaseInputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { // HBase??/* ww w .j av a 2s . c om*/ HbaseConfiguration hconf = new HbaseConfiguration(conf.getConfiguration(), HbaseConfiguration.FLAG_HBASE_INPUT); String tableName = hconf.getInputTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "[MR ERROR]HBase??<" + HbaseConfiguration.INPUT_TABLE + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ? String inputFieldName[] = hconf.getInputFieldNames(); this.vParamSrcTargetFieldNames(hconf, inputFieldName); if (hconf.getInputIsCombiner()) { conf.setCombinerClass(DBGroupReducer.class); } // ?TIMERANGE String timerange[] = hconf.getInputHBaseQueryTimerange(); this.vParamQueryTimeRange(timerange); // ?startrow String startrow = hconf.getInputHBaseQueryStartRow(); if (null == startrow || startrow.trim().length() <= 0) { MRLog.warn(LOG, "[MR WARN]?startrow<" + HbaseConfiguration.INPUT_QUERY_STARTROW + ">."); } // ?stoprow String stoprow = hconf.getInputHBaseQueryStopRow(); if (null == stoprow || stoprow.trim().length() <= 0) { MRLog.warn(LOG, "[MR WARN]?stoprow<" + HbaseConfiguration.INPUT_QUERY_STOPROW + ">."); } // ?timestamp long timestamp = hconf.getInputHBaseQueryTimestamp(); if (timestamp <= -1) { MRLog.warn(LOG, "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_TIMESTAMP + ">."); } // ?filters String filters = hconf.getInputHBaseQueryFilters(); if (null == filters || filters.length() <= 0) { MRLog.warn(LOG, "[MR WARN]??<" + HbaseConfiguration.INPUT_QUERY_FILTER + ">."); } // ?familyColumns String familyColumns[] = hconf.getInputHBaseQueryFamilyColumns(); if (null == familyColumns || familyColumns.length <= 0) { MRLog.warn(LOG, "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_FAMILYCOLUMNS + ">."); } if (null != familyColumns) { for (String tmp : familyColumns) { if (tmp.split(":").length != 2) { String meg = "[MR ERROR]?<" + HbaseConfiguration.INPUT_QUERY_FAMILYCOLUMNS + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } } } // ?familys String familys[] = hconf.getInputHBaseQueryFamilys(); if (null == familys || familys.length <= 0) { MRLog.warn(LOG, "[MR WARN]??<" + HbaseConfiguration.INPUT_QUERY_FAMILYS + ">."); } conf.setInputFormatClass(HbaseInputFormat.class); hconf.setInputClass(DBRecord.class); // ?MapTask? int taskNumber = HbaseInputFormat.getTableHRegionInfoCount(conf.getConfiguration(), startrow, stoprow); int reduceTasks = taskNumber; if (hconf.getInputMapEnd()) { reduceTasks = 0; } // hconf.setNumMapTasks(taskNumber); hconf.setNumReduceTasks(reduceTasks); hconf.setInputClass(DBRecord.class); conf.setMapperClass(DBMapper.class); conf.setMapOutputKeyClass(DBRecord.class); conf.setMapOutputValueClass(DBRecord.class); if (hconf.getInputIsCombiner()) { conf.setCombinerClass(DBGroupReducer.class); } }
From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { /**// w ww . j a va2s . c o m * ? */ HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration()); // ? String outRowChars = hconf.getOutputHiveFileRowsSplitChars(); if (null == outRowChars || outRowChars.length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ROWS_SPLITCHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } // String outFileSplitChars = hconf.getOutputHiveFileFieldSplitChars(); if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_FIELD_SPLITCHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } boolean para = hconf.getOutputHiveCompress(); // ? (?HDFSUtils.CompressCodec) String outCompressCodec = hconf.getOutputHiveCompressCodec(); if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?MR String outTargetpath = hconf.getOutputTargetFilePath(); hconf.setOutputTargetPath(outTargetpath); if (null == outTargetpath || outTargetpath.trim().length() <= 0) { MRLog.warn(LOG, "MR<" + HiveConfiguration.OUTPUT_HIVE_TARGET_PATH + ">"); } // ?hive?? String hiveUrl = hconf.getOutPutHiveConfigUrl(); if (null == hiveUrl || hiveUrl.trim().length() <= 0) { String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_URL + ">?."; LOG.error(meg); throw new Exception(meg); } // hive??? String hiveUser = hconf.getOutPutHiveConfigUser(); if (null == hiveUser || hiveUser.trim().length() <= 0) { LOG.warn("[MR WARN]hive???<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_USER + ">."); } // hive?? String hivePwd = hconf.getOutPutHiveConfigPassword(); if (null == hivePwd || hivePwd.trim().length() <= 0) { LOG.warn("[MR WARN]hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_PASSWORD + ">."); } // ?? String tableName = hconf.getOutputHiveTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_TABLE + ">?."; LOG.error(meg); throw new Exception(meg); } // ?? String partitionField[] = hconf.getOutputHivePartitionField(); if (null != partitionField && partitionField.length > 0) { // String[] outputFieldName = hconf.getOutputFieldNames(); if (null == outputFieldName || outputFieldName.length <= 0) { String meg = "<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } for (int i = 0; i < partitionField.length; i++) { boolean isExist = false; for (String s : outputFieldName) { if (s.equals(partitionField[i])) { isExist = true; break; } } if (!isExist) { String meg = "" + partitionField[i] + "<" + HiveConfiguration.OUTPUT_HIVE_PARTITION_FIELD + ">?<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ""; MRLog.error(LOG, meg); throw new Exception(meg); } } String orderOutputTempPath = hconf.getOutputHiveOrderTempPath(); if (null == orderOutputTempPath || orderOutputTempPath.trim().length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } String orderOutputFileNamePrefix = hconf.getOutputHiveOrderFileNamePrefix(); if (null == orderOutputFileNamePrefix || orderOutputFileNamePrefix.trim().length() <= 0) { String meg = "???<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">."; MRLog.warn(LOG, meg); } long orderOutputFileMaxCount = hconf.getOutputHiveOrderFileMaxCount(); if (orderOutputFileMaxCount == 0) { String meg = "?<" + HiveConfiguration.OUTPUT_HIVE_ORDER_FILEMAXCOUNT + ">0 -1(??)."; MRLog.error(LOG, meg); throw new Exception(meg); } } // DDL? String ddlHQL = hconf.getOutputHiveExecuteDDLHQL(); if (null == ddlHQL || ddlHQL.trim().length() <= 0) { LOG.warn("[MR WARN]hive?<" + HiveConfiguration.OUTPUT_HIVE_DDL_HQL + ">."); } try { executeDDLHQL(hconf); MRLog.info(LOG, "execute ddl hive sql success!"); } catch (SQLException e) { MRLog.error(LOG, "execute ddl hive sql error!"); e.printStackTrace(); } conf.setReduceSpeculativeExecution(false); conf.setOutputFormatClass(HiveOutputFormat.class); conf.setOutputKeyClass(DBRecord.class); conf.setOutputValueClass(NullWritable.class); if (null != partitionField && partitionField.length > 0) { conf.setCombinerClass(DBGroupReducer.class); conf.setReducerClass(DBPartitionReducer.class); } else { conf.setCombinerClass(DBGroupReducer.class); conf.setReducerClass(DBReducer.class); } }
From source file:com.example.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Your job name"); job.setJarByClass(Driver.class); logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: " + Arrays.toString(args)); if (args.length < 2) { logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar() + " input_files output_directory"); return 1; }/*from w w w .j a v a 2 s . c o m*/ job.setMapperClass(WordcountMapper.class); logger.info("mapper class is " + job.getMapperClass()); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(IntWritable.class); logger.info("mapper output key class is " + job.getMapOutputKeyClass()); logger.info("mapper output value class is " + job.getMapOutputValueClass()); job.setReducerClass(WordcountReducer.class); logger.info("reducer class is " + job.getReducerClass()); job.setCombinerClass(WordcountReducer.class); logger.info("combiner class is " + job.getCombinerClass()); //When you are not runnign any Reducer //OR job.setNumReduceTasks(0); // logger.info("number of reduce task is " + job.getNumReduceTasks()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); logger.info("output key class is " + job.getOutputKeyClass()); logger.info("output value class is " + job.getOutputValueClass()); job.setInputFormatClass(TextInputFormat.class); logger.info("input format class is " + job.getInputFormatClass()); job.setOutputFormatClass(TextOutputFormat.class); logger.info("output format class is " + job.getOutputFormatClass()); Path filePath = new Path(args[0]); logger.info("input path " + filePath); FileInputFormat.setInputPaths(job, filePath); Path outputPath = new Path(args[1]); logger.info("output path " + outputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0; }
From source file:com.frdeso.app.Sleepy.java
License:Apache License
/** * Performs integer summation of all the values for each key. *///from w w w . jav a2 s . com @Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: wordmean <in> <out> <number second>"); return 0; } Configuration conf = getConf(); conf.set("mapSleepTime", args[2]); @SuppressWarnings("deprecation") Job job = new Job(conf, "joba"); job.setJarByClass(Sleepy.class); job.setMapperClass(SleepyMapper.class); job.setCombinerClass(Reducer.class); job.setReducerClass(Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
/** * * @param inputPaths/*from w w w . j a va 2 s . c o m*/ * @param outputPath * @param inputFormat * @param inputKey * @param inputValue * @param mapper * @param mapperKey * @param mapperValue * @param combiner * @param reducer * @param outputKey * @param outputValue * @param outputFormat * @param conf * @param overwrite * @param isCompress * @return * @throws IOException */ public static Job prepareAvroJob(String inputPaths, String outputPath, Class<? extends InputFormat> inputFormat, Object inputKey, Object inputValue, Class<? extends Mapper> mapper, Object mapperKey, Object mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer, Object outputKey, Object outputValue, Class<? extends OutputFormat> outputFormat, Configuration conf, boolean overwrite, boolean isCompress) throws IOException { Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (inputKey instanceof Schema) { if (inputValue instanceof Schema) { inputFormat = inputFormat == null ? AvroKeyValueInputFormat.class : inputFormat; } inputFormat = inputFormat == null ? AvroKeyInputFormat.class : inputFormat; } if (inputFormat != null) { job.setInputFormatClass(inputFormat); } if (inputKey instanceof Schema) { AvroJob.setInputKeySchema(job, (Schema) inputKey); } if (inputValue instanceof Schema) { AvroJob.setInputValueSchema(job, (Schema) inputValue); } if (outputKey instanceof Schema) { if (outputValue instanceof Schema) { outputFormat = outputFormat == null ? AvroKeyValueOutputFormat.class : outputFormat; } outputFormat = outputFormat == null ? AvroKeyOutputFormat.class : outputFormat; } if (outputFormat != null) { job.setOutputFormatClass(outputFormat); } if (outputKey instanceof Schema) { AvroJob.setOutputKeySchema(job, (Schema) outputKey); } else if (outputKey instanceof Class) { job.setOutputKeyClass((Class) outputKey); } if (outputValue instanceof Schema) { AvroJob.setOutputValueSchema(job, (Schema) outputValue); } else if (outputValue instanceof Class) { job.setOutputValueClass((Class) outputValue); } if (reducer == null) { job.setNumReduceTasks(0); if (mapperKey instanceof Schema) { AvroJob.setMapOutputKeySchema(job, (Schema) mapperKey); } else if (mapperKey instanceof Class) { job.setOutputKeyClass((Class) mapperKey); } if (mapperValue instanceof Schema) { AvroJob.setOutputValueSchema(job, (Schema) mapperValue); } else if (mapperKey instanceof Class) { job.setOutputValueClass((Class) mapperValue); } job.setJarByClass(mapper); } else if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, new Path(outputPath)); if (isCompress) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); } job.setMapperClass(mapper); if (mapperKey instanceof Schema) { AvroJob.setMapOutputKeySchema(job, (Schema) mapperKey); } else if (mapperKey instanceof Class) { job.setMapOutputKeyClass((Class) mapperKey); } if (mapperValue instanceof Schema) { AvroJob.setMapOutputValueSchema(job, (Schema) mapperValue); } else if (mapperKey instanceof Class) { job.setMapOutputValueClass((Class) mapperValue); } if (reducer != null) { job.setReducerClass(reducer); } if (combiner != null) { job.setCombinerClass(combiner); } if (overwrite) { HadoopUtils.delete(jobConf, new Path(outputPath)); } return job; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static Job prepareAvroJob(String inputPaths, Path outputPath, Schema inputKeySchema, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer, Schema outputKeySchema, Class<? extends Writable> outputValue, Configuration conf, boolean overwrite) throws IOException { Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/*from w w w. j a v a 2 s. c o m*/ job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, inputKeySchema); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } if (combiner != null) { job.setCombinerClass(combiner); } job.setOutputFormatClass(AvroKeyOutputFormat.class); job.setReducerClass(reducer); AvroJob.setOutputKeySchema(job, outputKeySchema); job.setOutputValueClass(outputValue); if (overwrite) { HadoopUtils.delete(jobConf, outputPath); } return job; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static Job prepareAvroJob(String inputPaths, Path outputPath, Schema inputKeySchema, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer, Class<? extends Writable> outputKey, Class<? extends Writable> outputValue, Configuration conf, boolean overwrite) throws IOException { Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }//from w w w . j a v a 2s . co m job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, outputPath); // FileOutputFormat.setCompressOutput(job, true); // FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, inputKeySchema); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } if (combiner != null) { job.setCombinerClass(combiner); } job.setReducerClass(reducer); job.setOutputKeyClass(outputKey); job.setOutputValueClass(outputValue); if (overwrite) { HadoopUtils.delete(jobConf, outputPath); } return job; }
From source file:com.github.sample.mapreduce.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); System.out.println("coder"); //conf.addResource("etc/hadoop/hadoop-local.xml"); //conf.setBoolean("mapreduce.output.fileoutputformat.compress", true); //conf.setClass("mapreduce.output.fileoutputformat.compress.codec", GzipCodec.class, CompressionCodec.class); conf.set("fs.default.name", "hdfs://localhost:9000"); /*// w ww .j a v a2 s . co m conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName() ); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName() ); */ String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }