List of usage examples for org.apache.hadoop.mapred JobConf setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass)
From source file:com.intel.hadoop.graphbuilder.partition.mapreduce.vrecord.VrecordIngressMR.java
License:Open Source License
public void run(int numProcs, String inputpath, String outputpath) throws IOException { JobConf conf = new JobConf(VrecordIngressMR.class); conf.setJobName("Vrecord Mapreduce"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(VrecordIngressMapper.class); conf.setReducerClass(VrecordIngressReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(MultiDirOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); if (gzip) {//from ww w . jav a 2 s. co m TextOutputFormat.setCompressOutput(conf, true); TextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); } LOG.info("====== Job: Distributed Vertex Records to partitions ========="); LOG.info("input: " + inputpath); LOG.info("output: " + outputpath); LOG.info("numProc = " + numProcs); LOG.info("gzip = " + Boolean.toString(gzip)); LOG.info("=============================================================="); JobClient.runJob(conf); LOG.info("==========================Done==============================="); }
From source file:com.jyz.study.hadoop.mapreduce.datajoin.DataJoinJob.java
License:Apache License
public static JobConf createDataJoinJob(String args[]) throws IOException { String inputDir = args[0];/* ww w. j ava 2 s. co m*/ String outputDir = args[1]; Class inputFormat = SequenceFileInputFormat.class; if (args[2].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileInputFormat: " + args[2]); } else { System.out.println("Using TextInputFormat: " + args[2]); inputFormat = TextInputFormat.class; } int numOfReducers = Integer.parseInt(args[3]); Class mapper = getClassByName(args[4]); Class reducer = getClassByName(args[5]); Class mapoutputValueClass = getClassByName(args[6]); Class outputFormat = TextOutputFormat.class; Class outputValueClass = Text.class; if (args[7].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileOutputFormat: " + args[7]); outputFormat = SequenceFileOutputFormat.class; outputValueClass = getClassByName(args[7]); } else { System.out.println("Using TextOutputFormat: " + args[7]); } long maxNumOfValuesPerGroup = 100; String jobName = ""; if (args.length > 8) { maxNumOfValuesPerGroup = Long.parseLong(args[8]); } if (args.length > 9) { jobName = args[9]; } Configuration defaults = new Configuration(); JobConf job = new JobConf(defaults, DataJoinJob.class); job.setJobName("DataJoinJob: " + jobName); FileSystem fs = FileSystem.get(defaults); fs.delete(new Path(outputDir), true); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormat(inputFormat); job.setMapperClass(mapper); FileOutputFormat.setOutputPath(job, new Path(outputDir)); job.setOutputFormat(outputFormat); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(mapoutputValueClass); job.setOutputKeyClass(Text.class); job.setOutputValueClass(outputValueClass); job.setReducerClass(reducer); job.setNumMapTasks(1); job.setNumReduceTasks(numOfReducers); job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup); return job; }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
private static JobConf createJobConf(Configuration conf) { JobConf jobconf = new JobConf(conf, DistExec.class); jobconf.setJobName(NAME);// w w w .j a v a 2 s. c o m // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobconf.setMapSpeculativeExecution(false); jobconf.setInputFormat(ExecInputFormat.class); jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(Text.class); jobconf.setMapperClass(ExecFilesMapper.class); jobconf.setNumReduceTasks(0); // TODO implement singleOut by setting single reducer and prepending file name to output return jobconf; }
From source file:com.liveramp.hank.hadoop.HadoopDomainBuilder.java
License:Apache License
@Override protected void configureJob(JobConf conf) { // Input specification conf.setInputFormat(inputFormatClass); FileInputFormat.setInputPaths(conf, inputPath); // Mapper class and key/value classes conf.setMapperClass(mapperClass);//from w ww. ja v a 2 s . co m conf.setMapOutputKeyClass(KeyAndPartitionWritableComparable.class); conf.setMapOutputValueClass(ValueWritable.class); // Reducer class and key/value classes conf.setReducerClass(DomainBuilderReducer.class); conf.setOutputKeyClass(KeyAndPartitionWritable.class); conf.setOutputValueClass(ValueWritable.class); // Partitioner conf.setPartitionerClass(DomainBuilderPartitioner.class); }
From source file:com.liveramp.hank.hadoop.HadoopDomainCompactor.java
License:Apache License
@Override protected void configureJob(JobConf conf) { // Input format conf.setInputFormat(HadoopDomainCompactorInputFormat.class); // Mappers//ww w. j ava2s .co m conf.setMapperClass(HadoopDomainCompactorMapper.class); conf.setMapOutputKeyClass(KeyAndPartitionWritable.class); conf.setMapOutputValueClass(ValueWritable.class); // No reducers conf.setNumReduceTasks(0); // Output conf.setOutputKeyClass(KeyAndPartitionWritable.class); conf.setOutputValueClass(ValueWritable.class); }
From source file:com.manning.hip.ch4.joins.improved.impl.OptimizedDataJoinJob.java
License:Apache License
public static JobConf createDataJoinJob(String args[]) throws IOException { String inputDir = args[0];/*from w ww . ja v a 2s.co m*/ String outputDir = args[1]; Class inputFormat = SequenceFileInputFormat.class; if (args[2].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileInputFormat: " + args[2]); } else { System.out.println("Using TextInputFormat: " + args[2]); inputFormat = TextInputFormat.class; } int numOfReducers = Integer.parseInt(args[3]); Class mapper = getClassByName(args[4]); Class reducer = getClassByName(args[5]); Class mapoutputValueClass = getClassByName(args[6]); Class outputFormat = TextOutputFormat.class; Class outputValueClass = Text.class; if (args[7].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileOutputFormat: " + args[7]); outputFormat = SequenceFileOutputFormat.class; outputValueClass = getClassByName(args[7]); } else { System.out.println("Using TextOutputFormat: " + args[7]); } long maxNumOfValuesPerGroup = 100; String jobName = ""; if (args.length > 8) { maxNumOfValuesPerGroup = Long.parseLong(args[8]); } if (args.length > 9) { jobName = args[9]; } Configuration defaults = new Configuration(); JobConf job = new JobConf(defaults, OptimizedDataJoinJob.class); job.setJobName("DataJoinJob: " + jobName); FileSystem fs = FileSystem.get(defaults); fs.delete(new Path(outputDir)); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormat(inputFormat); job.setMapperClass(mapper); FileOutputFormat.setOutputPath(job, new Path(outputDir)); job.setOutputFormat(outputFormat); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(CompositeKey.class); job.setMapOutputValueClass(mapoutputValueClass); job.setOutputKeyClass(Text.class); job.setOutputValueClass(outputValueClass); job.setReducerClass(reducer); job.setPartitionerClass(CompositeKeyPartitioner.class); job.setOutputKeyComparatorClass(CompositeKeyComparator.class); job.setOutputValueGroupingComparator(CompositeKeyOnlyComparator.class); job.setNumMapTasks(1); job.setNumReduceTasks(numOfReducers); job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup); return job; }
From source file:com.maxpoint.cascading.avro.AvroScheme.java
License:Open Source License
@Override public void sinkConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader<AvroWrapper<Record>, Writable>, OutputCollector<AvroWrapper<Record>, Writable>> tap, JobConf conf) { conf.set(AvroJob.OUTPUT_SCHEMA, dataSchema.toString()); conf.setOutputFormat(AvroOutputFormat.class); conf.setOutputKeyClass(AvroWrapper.class); // set compression AvroOutputFormat.setDeflateLevel(conf, 6); AvroJob.setOutputCodec(conf, DataFileConstants.DEFLATE_CODEC); AvroOutputFormat.setSyncInterval(conf, 1048576); }
From source file:com.mh2c.WikipediaDumpLoaderDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { // arg checks JobConf conf = new JobConf(getClass()); conf.setJobName("WP dump loader"); // Set the mapper class, but skip the reduce phase conf.setMapperClass(WikipediaDumpLoaderMapper.class); conf.setNumReduceTasks(0);/*from w ww.j a v a 2s . co m*/ // The object key/value pairs are text conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); // Stream XML into the job conf.setInputFormat(StreamInputFormat.class); StreamInputFormat.addInputPath(conf, new Path(args[0])); // Use the XML record reader, with each page as one record conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader"); conf.set("stream.recordreader.begin", "<page>"); conf.set("stream.recordreader.end", "</page>"); // Emit sequence files conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); return 0; }
From source file:com.mh2c.WikipediaWordCountDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { // arg checks JobConf conf = new JobConf(getClass()); conf.setJobName("WP word count"); // Set the mapper and reducer classes, and use the reducer as a combiner conf.setMapperClass(WikipediaWordCountMapper.class); conf.setReducerClass(WikipediaWordCountReducer.class); conf.setCombinerClass(WikipediaWordCountReducer.class); // The object key/value pairs are text words and integer counts conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); // Read in sequence files conf.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(conf, new Path(args[0])); // Emit ordinary text files conf.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w. j a va2 s .c o m*/ return 0; }
From source file:com.mongodb.hadoop.examples.treasury.TreasuryYieldXMLConfigV2.java
License:Apache License
public int run(final String[] args) throws Exception { final Configuration conf = getConf(); final JobConf job = new JobConf(conf); job.setReducerClass(TreasuryYieldReducerV2.class); job.setMapperClass(TreasuryYieldMapperV2.class); job.setOutputFormat(MongoOutputFormat.class); job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf)); job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf)); job.setMapOutputKeyClass(MongoConfigUtil.getMapperOutputKey(conf)); job.setMapOutputValueClass(MongoConfigUtil.getMapperOutputValue(conf)); job.setInputFormat(MongoInputFormat.class); JobClient.runJob(job);//w w w.j av a2 s . co m return 0; }