List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass)
From source file:Brush.PreCorrect.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: PreCorrect"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(PreCorrect.class); conf.setJobName("PreCorrect " + inputPath + " " + BrushConfig.K); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(PreCorrectMapper.class); conf.setReducerClass(PreCorrectReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.QuickMark.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: QuickMark"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(QuickMark.class); //JobConf conf = new JobConf(Stats.class); conf.setJobName("QuickMark " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setMapperClass(QuickMarkMapper.class); conf.setReducerClass(QuickMarkReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.QuickMerge.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: QuickMerge"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); //JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(QuickMerge.class); conf.setJobName("QuickMerge " + inputPath + " " + BrushConfig.K); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setMapperClass(QuickMergeMapper.class); conf.setReducerClass(QuickMergeReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.RedundantRemoval.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: RedundantRemoval"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(RedundantRemoval.class); conf.setJobName("RedundantRemoval " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setMapperClass(RedundantRemovalMapper.class); conf.setReducerClass(RedundantRemovalReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.RemoveLowCoverage.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: RemoveLowCoverage"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(RemoveLowCoverage.class); conf.setJobName("RemoveLowCoverage " + inputPath + " " + BrushConfig.K); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(RemoveLowCoverageMapper.class); conf.setReducerClass(RemoveLowCoverageReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.Stats.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Stats"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(Stats.class); conf.setJobName("Stats " + inputPath); BrushConfig.initializeConfiguration(conf); conf.setNumReduceTasks(1);//from w w w .ja va2 s . co m FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(StatsMapper.class); conf.setReducerClass(StatsReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.TipsRemoval.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: TipsRemoval"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(TipsRemoval.class); conf.setJobName("RemoveTips " + inputPath + " " + BrushConfig.K); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setMapperClass(TipsRemovalMapper.class); conf.setReducerClass(TipsRemovalReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.TransitiveReduction.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: TransitiveReduction"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); //JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(TransitiveReduction.class); conf.setJobName("TransitiveReduction " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setMapperClass(TransitiveReductionMapper.class); conf.setReducerClass(TransitiveReductionReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.VerifyOverlap.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: VerifyOverlap"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); //JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(VerifyOverlap.class); conf.setJobName("VerifyOverlap " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); //conf.setClass("mapred.output.compression.codec", GzipCodec.class,CompressionCodec.class); conf.setMapperClass(VerifyOverlapMapper.class); conf.setReducerClass(VerifyOverlapReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:ca.etsmtl.lasi.hbasewikipedialoader.HBaseWikipediaLoader.java
License:Apache License
/** * Sets up the actual job./* w ww . j a va2s . com*/ * * @param conf * The current configuration. * @param args * The command line parameters. * @return The newly created job. * @throws IOException * When setting up the job fails. */ public static JobConf createSubmittableJob(HBaseConfiguration conf, String[] args) throws IOException { JobConf jobConf = new JobConf(conf, HBaseWikipediaLoader.class); jobConf.setJobName(NAME); // Stream stuff jobConf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader"); jobConf.set("stream.recordreader.begin", "<page>"); jobConf.set("stream.recordreader.end", "</page>"); jobConf.setSpeculativeExecution(false); jobConf.setMapOutputKeyClass(ImmutableBytesWritable.class); jobConf.setMapOutputValueClass(BatchUpdate.class); jobConf.setMapperClass(Map.class); jobConf.setNumReduceTasks(0); jobConf.setInputFormat(StreamInputFormat.class); jobConf.setOutputFormat(TableOutputFormat.class); jobConf.set(TableOutputFormat.OUTPUT_TABLE, TABLE); jobConf.setOutputKeyClass(ImmutableBytesWritable.class); jobConf.setOutputValueClass(BatchUpdate.class); StreamInputFormat.setInputPaths(jobConf, new Path(args[0])); FileOutputFormat.setOutputPath(jobConf, new Path("/tmp/" + NAME + "-" + System.currentTimeMillis())); return jobConf; }