List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob.java
License:Apache License
public Job run() throws Exception { Job job = Job.getInstance(getConf()); job.setJobName(name);//from ww w . j a v a2 s .co m job.setJarByClass(TopKPhaseJob.class); Configuration configuration = job.getConfiguration(); FileSystem fs = FileSystem.get(configuration); // Properties LOGGER.info("Properties {}", props); // Input Path String inputPathDir = getAndSetConfiguration(configuration, TOPK_PHASE_INPUT_PATH); LOGGER.info("Input path dir: " + inputPathDir); for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) { LOGGER.info("Adding input:" + inputPath); Path input = new Path(inputPath); FileInputFormat.addInputPath(job, input); } // Output path Path outputPath = new Path(getAndSetConfiguration(configuration, TOPK_PHASE_OUTPUT_PATH)); LOGGER.info("Output path dir: " + outputPath.toString()); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); // Schema Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir); LOGGER.info("Schema : {}", avroSchema.toString(true)); // ThirdEyeConfig String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty( props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema); props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty); ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props); LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode()); job.getConfiguration().set(TOPK_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig)); // Map config job.setMapperClass(TopKPhaseMapper.class); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); // Combiner job.setCombinerClass(TopKPhaseCombiner.class); // Reduce config job.setReducerClass(TopKPhaseReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); job.waitForCompletion(true); return job; }
From source file:com.littlehotspot.hadoop.mr.box.BoxLog.java
License:Open Source License
@Override public int run(String[] arg) throws Exception { try {/*from ww w . ja va2 s .c o m*/ // ??? if (arg.length > 2) { BOX_LOG_FORMAT_REGEX = Pattern.compile(arg[2]); } Job job = Job.getInstance(this.getConf(), BoxLog.class.getSimpleName()); job.setJarByClass(BoxLog.class); /***/ Path inputPath = new Path(arg[0]); FileInputFormat.addInputPath(job, inputPath); job.setMapperClass(BoxMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); /***/ Path outputPath = new Path(arg[1]); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); job.setReducerClass(BoxReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.mobile.MobileLog.java
License:Open Source License
@Override public int run(String[] arg) throws Exception { try {/*w w w .j ava 2 s . c o m*/ Job job = Job.getInstance(this.getConf(), MobileLog.class.getSimpleName()); job.setJarByClass(MobileLog.class); /***/ Path inputPath = new Path(arg[0]); FileInputFormat.addInputPath(job, inputPath); job.setMapperClass(MobileMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); /***/ Path outputPath = new Path(arg[1]); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); job.setReducerClass(MobileReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFScheduler.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {/*w w w. ja va 2 s . com*/ CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex); String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath); String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath); // ??? if (StringUtils.isNotBlank(matcherRegex)) { CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex); } Path inputPath = new Path(hdfsInputPath); Path outputPath = new Path(hdfsOutputPath); Job job = Job.getInstance(this.getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(CDFMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(GeneralReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.nginx.module.hdfs2hbase.api.user.UserScheduler.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {/*from ww w . jav a 2 s . co m*/ CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE CommonVariables.hBaseHelper = new HBaseHelper(this.getConf()); // ?? String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex); String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath); String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath); // ??? if (StringUtils.isNotBlank(matcherRegex)) { CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex); } Path inputPath = new Path(hdfsInputPath); Path outputPath = new Path(hdfsOutputPath); Job job = Job.getInstance(this.getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); job.setMapperClass(UserMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(UserReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } // boolean state = job.waitForCompletion(true); if (!state) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.marklogic.mapreduce.examples.BinaryReader.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: BinaryReader configFile outputDir"); System.exit(2);/* w w w .jav a 2 s.c o m*/ } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "binary reader"); job.setJarByClass(BinaryReader.class); job.setInputFormatClass(DocumentInputFormat.class); job.setMapperClass(DocMapper.class); job.setMapOutputKeyClass(DocumentURI.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputFormatClass(BinaryOutputFormat.class); job.setOutputKeyClass(DocumentURI.class); job.setOutputValueClass(BytesWritable.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); conf = job.getConfiguration(); conf.addResource(otherArgs[0]); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.marklogic.mapreduce.examples.HelloWorld.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "hello world"); job.setJarByClass(HelloWorld.class); // Map related configuration job.setInputFormatClass(DocumentInputFormat.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); // Reduce related configuration job.setReducerClass(MyReducer.class); job.setOutputFormatClass(ContentOutputFormat.class); job.setOutputKeyClass(DocumentURI.class); job.setOutputValueClass(Text.class); conf = job.getConfiguration();// w w w.j av a2 s .c om conf.addResource("marklogic-hello-world.xml"); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.marklogic.mapreduce.examples.LinkCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: LinkCount configFile outputDir"); System.exit(2);// ww w .j a v a 2 s. c o m } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "link count"); job.setJarByClass(LinkCount.class); job.setInputFormatClass(ValueInputFormat.class); job.setMapperClass(RefMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); conf = job.getConfiguration(); conf.addResource(otherArgs[0]); conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.marklogic.mapreduce.examples.LinkCountCooccurrences.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: LinkCountCooccurrences configFile outputDir"); System.exit(2);/* w ww . j av a 2 s . c o m*/ } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "link count cooccurrences"); job.setJarByClass(LinkCountCooccurrences.class); job.setInputFormatClass(KeyValueInputFormat.class); job.setMapperClass(RefMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); conf = job.getConfiguration(); conf.addResource(otherArgs[0]); conf.setClass(MarkLogicConstants.INPUT_KEY_CLASS, Text.class, Writable.class); conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class); conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, HrefTitleMap.class, ElemAttrValueCooccurrences.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.marklogic.mapreduce.examples.LinkCountHDFS.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: LinkCountHDFS inputDir outputDir"); System.exit(2);/*from w w w . jav a2 s.c o m*/ } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "link count hdfs"); job.setJarByClass(LinkCountHDFS.class); job.setInputFormatClass(HDFSInputFormat.class); job.setMapperClass(RefMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); HDFSInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }