List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass)
From source file:com.ibm.jaql.fail.io.ErrorOutputConfigurator.java
License:Apache License
@Override protected void registerSerializers(JobConf conf) { conf.setMapOutputKeyClass(JsonHolderDefault.class); conf.setMapOutputValueClass(JsonHolderDefault.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(ErrorWritable.class); HadoopSerializationDefault.register(conf); }
From source file:com.ibm.jaql.io.hadoop.TextFileOutputConfigurator.java
License:Apache License
@Override protected void registerSerializers(JobConf conf) { conf.setMapOutputKeyClass(JsonHolderDefault.class); conf.setMapOutputValueClass(JsonHolderDefault.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); }
From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.HashIdMR.java
License:Open Source License
/** * @param inputpath// w ww. j ava 2 s . c om * the path to a unique vertex list. Each line is parsed into (vid, * data) using {@code vidparser} and {@code vdataparser}. * @param outputpath * the path of output directory. * @throws IOException */ public void run(String inputpath, String outputpath) throws IOException { JobConf conf = new JobConf(HashIdMR.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(HashIdMapper.class); conf.setReducerClass(HashIdReducer.class); conf.setInputFormat(NLineInputFormat.class); conf.setOutputFormat(MultiDirOutputFormat.class); conf.setInt("mapred.line.input.format.linespermap", linespermap); conf.set("GraphParser", graphparser.getClass().getName()); conf.set("VidParser", vidparser.getClass().getName()); conf.set("VdataParser", vdataparser.getClass().getName()); FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); LOG.info("====== Job: Create integer Id maps for vertices =========="); LOG.info("Input = " + inputpath); LOG.info("Output = " + outputpath); LOG.debug("Lines per map = 6000000"); LOG.debug("GraphParser = " + graphparser.getClass().getName()); LOG.debug("VidParser = " + vidparser.getClass().getName()); LOG.debug("VdataParser = " + vdataparser.getClass().getName()); LOG.info("=========================================================="); JobClient.runJob(conf); LOG.info("=======================Done =====================\n"); }
From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.SortDictMR.java
License:Open Source License
/** * @param inputpath/*w ww .j a v a 2 s . com*/ * the path to a rawId to newId dictionary. * @param outputpath * the path of output directory. * @throws IOException */ public void run(String inputpath, String outputpath) throws IOException { JobConf conf = new JobConf(SortDictMR.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(SortDictMapper.class); conf.setReducerClass(SortDictReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setBoolean("hashRawVid", hashRawVid); conf.setInt("numChunks", numChunks); conf.set("VidParser", vidparser.getClass().getName()); String outprefix = "vidhashmap"; for (int i = 0; i < numChunks; i++) { MultipleOutputs.addNamedOutput(conf, outprefix + i, TextOutputFormat.class, Text.class, Text.class); } FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); LOG.info("========== Job: Partition the map of rawid -> id ==========="); LOG.info("Input = " + inputpath); LOG.info("Output = " + outputpath); LOG.info("======================================================"); if (hashRawVid) LOG.info("Partition on rawId."); else LOG.info("Partition on newId"); LOG.debug("numChunks = " + numChunks); LOG.debug("VidParser = " + vidparser.getClass().getName()); JobClient.runJob(conf); LOG.info("======================= Done ==========================\n"); }
From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.TransEdgeMR.java
License:Open Source License
/** * @param inputpath/*from w w w .ja v a2 s. c om*/ * path of the partitioned edge list * @param outputpath * path of the output directory * @throws IOException */ public void run(String inputpath, String outputpath) throws IOException { JobConf conf = new JobConf(TransEdgeMR.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(TransEdgeMapper.class); conf.setReducerClass(TransEdgeReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setInt("numChunks", numChunks); conf.set("GraphParser", graphparser.getClass().getName()); conf.set("VidParser", vidparser.getClass().getName()); conf.set("EdataParser", edataparser.getClass().getName()); conf.set("dictionaryPath", dictionaryPath); FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); LOG.info("============= Job: Normalize Ids in Edges ===================="); LOG.info("Input = " + inputpath); LOG.info("Output = " + outputpath); LOG.info("Dictionary = " + dictionaryPath); LOG.debug("numChunks = " + numChunks); LOG.debug("GraphParser = " + graphparser.getClass().getName()); LOG.debug("VidParser = " + vidparser.getClass().getName()); LOG.debug("EdataParser = " + edataparser.getClass().getName()); LOG.info("==============================================================="); JobClient.runJob(conf); LOG.info("========================= Done ==============================="); }
From source file:com.intel.hadoop.graphbuilder.partition.mapreduce.vrecord.VrecordIngressMR.java
License:Open Source License
public void run(int numProcs, String inputpath, String outputpath) throws IOException { JobConf conf = new JobConf(VrecordIngressMR.class); conf.setJobName("Vrecord Mapreduce"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(VrecordIngressMapper.class); conf.setReducerClass(VrecordIngressReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(MultiDirOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); if (gzip) {/*from ww w .j ava 2 s.c o m*/ TextOutputFormat.setCompressOutput(conf, true); TextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); } LOG.info("====== Job: Distributed Vertex Records to partitions ========="); LOG.info("input: " + inputpath); LOG.info("output: " + outputpath); LOG.info("numProc = " + numProcs); LOG.info("gzip = " + Boolean.toString(gzip)); LOG.info("=============================================================="); JobClient.runJob(conf); LOG.info("==========================Done==============================="); }
From source file:com.jyz.study.hadoop.mapreduce.datajoin.DataJoinJob.java
License:Apache License
public static JobConf createDataJoinJob(String args[]) throws IOException { String inputDir = args[0];//from w w w . j a v a2 s . c o m String outputDir = args[1]; Class inputFormat = SequenceFileInputFormat.class; if (args[2].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileInputFormat: " + args[2]); } else { System.out.println("Using TextInputFormat: " + args[2]); inputFormat = TextInputFormat.class; } int numOfReducers = Integer.parseInt(args[3]); Class mapper = getClassByName(args[4]); Class reducer = getClassByName(args[5]); Class mapoutputValueClass = getClassByName(args[6]); Class outputFormat = TextOutputFormat.class; Class outputValueClass = Text.class; if (args[7].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileOutputFormat: " + args[7]); outputFormat = SequenceFileOutputFormat.class; outputValueClass = getClassByName(args[7]); } else { System.out.println("Using TextOutputFormat: " + args[7]); } long maxNumOfValuesPerGroup = 100; String jobName = ""; if (args.length > 8) { maxNumOfValuesPerGroup = Long.parseLong(args[8]); } if (args.length > 9) { jobName = args[9]; } Configuration defaults = new Configuration(); JobConf job = new JobConf(defaults, DataJoinJob.class); job.setJobName("DataJoinJob: " + jobName); FileSystem fs = FileSystem.get(defaults); fs.delete(new Path(outputDir), true); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormat(inputFormat); job.setMapperClass(mapper); FileOutputFormat.setOutputPath(job, new Path(outputDir)); job.setOutputFormat(outputFormat); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(mapoutputValueClass); job.setOutputKeyClass(Text.class); job.setOutputValueClass(outputValueClass); job.setReducerClass(reducer); job.setNumMapTasks(1); job.setNumReduceTasks(numOfReducers); job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup); return job; }
From source file:com.liveramp.hank.hadoop.HadoopDomainBuilder.java
License:Apache License
@Override protected void configureJob(JobConf conf) { // Input specification conf.setInputFormat(inputFormatClass); FileInputFormat.setInputPaths(conf, inputPath); // Mapper class and key/value classes conf.setMapperClass(mapperClass);//from w ww. j av a 2s . c om conf.setMapOutputKeyClass(KeyAndPartitionWritableComparable.class); conf.setMapOutputValueClass(ValueWritable.class); // Reducer class and key/value classes conf.setReducerClass(DomainBuilderReducer.class); conf.setOutputKeyClass(KeyAndPartitionWritable.class); conf.setOutputValueClass(ValueWritable.class); // Partitioner conf.setPartitionerClass(DomainBuilderPartitioner.class); }
From source file:com.liveramp.hank.hadoop.HadoopDomainCompactor.java
License:Apache License
@Override protected void configureJob(JobConf conf) { // Input format conf.setInputFormat(HadoopDomainCompactorInputFormat.class); // Mappers/*from w w w. j av a2 s.c o m*/ conf.setMapperClass(HadoopDomainCompactorMapper.class); conf.setMapOutputKeyClass(KeyAndPartitionWritable.class); conf.setMapOutputValueClass(ValueWritable.class); // No reducers conf.setNumReduceTasks(0); // Output conf.setOutputKeyClass(KeyAndPartitionWritable.class); conf.setOutputValueClass(ValueWritable.class); }
From source file:com.manning.hip.ch4.joins.improved.impl.OptimizedDataJoinJob.java
License:Apache License
public static JobConf createDataJoinJob(String args[]) throws IOException { String inputDir = args[0];// w w w . j a v a2 s.c o m String outputDir = args[1]; Class inputFormat = SequenceFileInputFormat.class; if (args[2].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileInputFormat: " + args[2]); } else { System.out.println("Using TextInputFormat: " + args[2]); inputFormat = TextInputFormat.class; } int numOfReducers = Integer.parseInt(args[3]); Class mapper = getClassByName(args[4]); Class reducer = getClassByName(args[5]); Class mapoutputValueClass = getClassByName(args[6]); Class outputFormat = TextOutputFormat.class; Class outputValueClass = Text.class; if (args[7].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileOutputFormat: " + args[7]); outputFormat = SequenceFileOutputFormat.class; outputValueClass = getClassByName(args[7]); } else { System.out.println("Using TextOutputFormat: " + args[7]); } long maxNumOfValuesPerGroup = 100; String jobName = ""; if (args.length > 8) { maxNumOfValuesPerGroup = Long.parseLong(args[8]); } if (args.length > 9) { jobName = args[9]; } Configuration defaults = new Configuration(); JobConf job = new JobConf(defaults, OptimizedDataJoinJob.class); job.setJobName("DataJoinJob: " + jobName); FileSystem fs = FileSystem.get(defaults); fs.delete(new Path(outputDir)); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormat(inputFormat); job.setMapperClass(mapper); FileOutputFormat.setOutputPath(job, new Path(outputDir)); job.setOutputFormat(outputFormat); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(CompositeKey.class); job.setMapOutputValueClass(mapoutputValueClass); job.setOutputKeyClass(Text.class); job.setOutputValueClass(outputValueClass); job.setReducerClass(reducer); job.setPartitionerClass(CompositeKeyPartitioner.class); job.setOutputKeyComparatorClass(CompositeKeyComparator.class); job.setOutputValueGroupingComparator(CompositeKeyOnlyComparator.class); job.setNumMapTasks(1); job.setNumReduceTasks(numOfReducers); job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup); return job; }