List of usage examples for org.apache.hadoop.mapred JobConf setReducerClass
public void setReducerClass(Class<? extends Reducer> theClass)
From source file:edu.ohsu.sonmezsysbio.cloudbreak.command.CommandNovoalignSingleEnds.java
public void runHadoopJob(Configuration configuration) throws IOException, URISyntaxException { JobConf conf = new JobConf(configuration); conf.setJobName("Single End Alignment"); conf.setJarByClass(Cloudbreak.class); FileInputFormat.addInputPath(conf, new Path(hdfsDataDir)); Path outputDir = new Path(hdfsAlignmentsDir); FileSystem.get(conf).delete(outputDir); FileOutputFormat.setOutputPath(conf, outputDir); addDistributedCacheFile(conf, reference, "novoalign.reference"); addDistributedCacheFile(conf, pathToNovoalign, "novoalign.executable"); if (pathToNovoalignLicense != null) { addDistributedCacheFile(conf, pathToNovoalignLicense, "novoalign.license"); }//from w w w .ja v a 2s .co m DistributedCache.createSymlink(conf); conf.set("mapred.task.timeout", "3600000"); conf.set("novoalign.threshold", threshold); conf.set("novoalign.quality.format", qualityFormat); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(NovoalignSingleEndMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setCompressMapOutput(true); conf.setReducerClass(SingleEndAlignmentsToPairsReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.set("mapred.output.compress", "true"); conf.set("mapred.output.compression", "org.apache.hadoop.io.compress.SnappyCodec"); JobClient.runJob(conf); }
From source file:edu.stolaf.cs.wmrserver.streaming.StreamJob.java
License:Apache License
public static void setStreamReducer(JobConf conf, String reduceCommand) { conf.setReducerClass(PipeReducer.class); try {/*from w w w . j a v a 2 s. co m*/ conf.set("stream.reduce.streamprocessor", URLEncoder.encode(reduceCommand, "UTF-8")); } catch (java.io.UnsupportedEncodingException ex) { throw new RuntimeException("The sky is falling! Java doesn't support UTF-8."); } }
From source file:edu.ub.ahstfg.indexer.Indexer.java
License:Open Source License
@Override public int run(String[] arg0) throws Exception { LOG.info("Creating Hadoop job for Indexer."); JobConf job = new JobConf(getConf()); job.setJarByClass(Indexer.class); LOG.info("Setting input path to '" + INPUT_PATH + "'"); FileInputFormat.setInputPaths(job, new Path(INPUT_PATH)); // Set filters if it's necessary. LOG.info("Clearing the output path at '" + OUTPUT_PATH + "'"); // Change URI to Path if it's necessary. FileSystem fs = FileSystem.get(new URI(OUTPUT_PATH), job); if (fs.exists(new Path(OUTPUT_PATH))) { fs.delete(new Path(OUTPUT_PATH), true); }// w w w. j ava 2s.c o m LOG.info("Setting output path to '" + OUTPUT_PATH + "'"); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH)); FileOutputFormat.setCompressOutput(job, false); LOG.info("Setting input format."); job.setInputFormat(ArcInputFormat.class); LOG.info("Setting output format."); job.setOutputFormat(IndexOutputFormat.class); LOG.info("Setting output data types."); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IndexRecord.class); LOG.info("Setting mapper and reducer."); job.setMapperClass(IndexerMapper.class); job.setMapOutputValueClass(ParsedDocument.class); job.setReducerClass(IndexerReducer.class); if (JobClient.runJob(job).isSuccessful()) { return 0; } else { return 1; } }
From source file:edu.ub.ahstfg.indexer.wordcount.WordCount.java
License:Open Source License
@Override public int run(String[] args) throws Exception { LOG.info("Creating Hadoop job for ARC input files word count."); JobConf job = new JobConf(getConf()); job.setJarByClass(WordCount.class); LOG.info("Setting input path to '" + inputPath + "'"); FileInputFormat.setInputPaths(job, new Path(inputPath)); // Set filters if it's necessary. LOG.info("Clearing the output path at '" + outputPath + "'"); // Change URI to Path if it's necessary. FileSystem fs = FileSystem.get(new URI(outputPath), job); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); }/* www . java2s . c o m*/ LOG.info("Setting output path to '" + outputPath + "'"); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, false); LOG.info("Setting input format."); // job.setInputFormat(TextInputFormat.class); job.setInputFormat(ArcInputFormat.class); LOG.info("Setting output format."); job.setOutputFormat(TextOutputFormat.class); LOG.info("Setting output data types."); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); LOG.info("Setting mapper and reducer."); // job.setMapperClass(WordCountTextInputMapper.class); job.setMapperClass(WordCountArcInputMapper.class); job.setReducerClass(LongSumReducer.class); if (JobClient.runJob(job).isSuccessful()) { return 0; } else { return 1; } }
From source file:edu.ubc.mirrors.holographs.mapreduce.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { JobConf job = new JobConf(getConf()); job.setClassLoader(Driver.class.getClassLoader()); job.setInputFormat(SnapshotObjectsOfTypeInputFormat.class); job.setMapperClass(InvokeMethodMapper.class); job.setCombinerClass(TextCountSumReducer.class); job.setReducerClass(TextCountSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.set("snapshotPath", args[0]); job.set("targetClassName", "org.eclipse.cdt.internal.core.dom.parser.cpp.CPPASTName"); job.setInt("splitSize", 10000); job.setInt("maxNumObjects", 100000); FileInputFormat.addInputPath(job, new Path(args[0])); String outputPath = args[1];//from w w w. j av a 2 s . c om int suffix = 2; while (new File(outputPath).exists()) { outputPath = args[1] + suffix++; } FileOutputFormat.setOutputPath(job, new Path(outputPath)); JobClient.runJob(job); return 0; }
From source file:edu.uci.ics.hyracks.imru.util.DataBalancer.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(DataBalancer.class); job.setJobName(DataBalancer.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(Integer.parseInt(args[2])); if (args.length > 3) { if (args[3].startsWith("bzip")) FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); if (args[3].startsWith("gz")) FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); }/*from w ww . j a v a2 s .c om*/ JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.core.util.DataGenerator.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(DataGenerator.class); FileSystem dfs = FileSystem.get(job); String maxFile = "/maxtemp"; dfs.delete(new Path(maxFile), true); job.setJobName(DataGenerator.class.getSimpleName() + "max ID"); job.setMapperClass(MapMaxId.class); job.setCombinerClass(CombineMaxId.class); job.setReducerClass(ReduceMaxId.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(VLongWritable.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(maxFile)); job.setNumReduceTasks(1);/*from w w w . j ava2 s . c o m*/ JobClient.runJob(job); job = new JobConf(DataGenerator.class); job.set("hyracks.maxid.file", maxFile); job.setInt("hyracks.x", Integer.parseInt(args[2])); dfs.delete(new Path(args[1]), true); job.setJobName(DataGenerator.class.getSimpleName()); job.setMapperClass(MapRecordGen.class); job.setReducerClass(ReduceRecordGen.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(Integer.parseInt(args[3])); if (args.length > 4) { if (args[4].startsWith("bzip")) FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); if (args[4].startsWith("gz")) FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.example.utils.CommonSource.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(GraphPreProcessor.class); job.setJobName(GraphPreProcessor.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setInputFormat(TextInputFormat.class); for (int i = 0; i < args.length - 2; i++) { FileInputFormat.addInputPath(job, new Path(args[i])); }/*w w w .ja va 2 s . c o m*/ FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2])); job.setNumReduceTasks(Integer.parseInt(args[args.length - 1])); JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.example.utils.FindLargest.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(GraphPreProcessor.class); job.setJobName(GraphPreProcessor.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setCombinerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setInputFormat(TextInputFormat.class); for (int i = 0; i < args.length - 2; i++) { FileInputFormat.addInputPath(job, new Path(args[i])); }//from www . j ava 2 s .co m FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2])); job.setNumReduceTasks(Integer.parseInt(args[args.length - 1])); JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.example.utils.VertexAggregator.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(VertexAggregator.class); job.setJobName(VertexAggregator.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setCombinerClass(CombineRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(Integer.parseInt(args[2])); JobClient.runJob(job);/*from w w w . j av a2 s.c o m*/ }