List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:jobs.MatrixBlockSum.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[2])); if (args.length >= 4) conf.setInt("SR", Integer.parseInt(args[3])); if (args.length >= 5) conf.setInt("SC", Integer.parseInt(args[4])); conf.set("RESNAME", args[1]); //heap space - again - should be passed with the -D option conf.set("mapred.map.child.java.opts", "-Xmx3G"); conf.set("mapred.reduce.child.java.opts", "-Xmx3G"); //job/*from w ww. j ava 2s . co m*/ Job job1 = new Job(conf, "MatrixBlockSum"); job1.setJarByClass(MatrixBlockSum.class); // No Map FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(SequenceFileInputFormat.class); //Reduce job1.setCombinerClass(MatrixBlockSumReducer.class); job1.setReducerClass(MatrixBlockSumReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(MatrixBlock.class); FileOutputFormat.setOutputPath(job1, new Path(args[1])); job1.setOutputFormatClass(SequenceFileOutputFormat.class); //job1.setOutputFormatClass(TextOutputFormat.class); return job1.waitForCompletion(false) ? 0 : 1; }
From source file:kogiri.mapreduce.preprocess.indexing.stage2.KmerIndexBuilder.java
License:Open Source License
private int runJob(PreprocessorConfig ppConfig) throws Exception { // check config validatePreprocessorConfig(ppConfig); // configuration Configuration conf = this.getConf(); // set user configuration ppConfig.getClusterConfiguration().configureTo(conf); ppConfig.saveTo(conf);// w ww . j av a 2s .c o m Path[] inputFiles = FileSystemHelper.getAllFastaFilePath(conf, ppConfig.getFastaPath()); boolean job_result = true; List<Job> jobs = new ArrayList<Job>(); for (int round = 0; round < inputFiles.length; round++) { Path roundInputFile = inputFiles[round]; String roundOutputPath = ppConfig.getKmerIndexPath() + "_round" + round; Job job = new Job(conf, "Kogiri Preprocessor - Building Kmer Indices (" + round + " of " + inputFiles.length + ")"); job.setJarByClass(KmerIndexBuilder.class); // Mapper job.setMapperClass(KmerIndexBuilderMapper.class); job.setInputFormatClass(FastaReadInputFormat.class); job.setMapOutputKeyClass(CompressedSequenceWritable.class); job.setMapOutputValueClass(CompressedIntArrayWritable.class); // Combiner job.setCombinerClass(KmerIndexBuilderCombiner.class); // Partitioner job.setPartitionerClass(KmerIndexBuilderPartitioner.class); // Reducer job.setReducerClass(KmerIndexBuilderReducer.class); // Specify key / value job.setOutputKeyClass(CompressedSequenceWritable.class); job.setOutputValueClass(CompressedIntArrayWritable.class); // Inputs FileInputFormat.addInputPaths(job, roundInputFile.toString()); LOG.info("Input file : "); LOG.info("> " + roundInputFile.toString()); String histogramFileName = KmerHistogramHelper.makeKmerHistogramFileName(roundInputFile.getName()); Path histogramPath = new Path(ppConfig.getKmerHistogramPath(), histogramFileName); KmerIndexBuilderPartitioner.setHistogramPath(job.getConfiguration(), histogramPath); FileOutputFormat.setOutputPath(job, new Path(roundOutputPath)); job.setOutputFormatClass(MapFileOutputFormat.class); // Use many reducers int reducersPerNode = ppConfig.getClusterConfiguration().getMachineCores() / 2; if (reducersPerNode < 1) { reducersPerNode = 1; } int reducers = ppConfig.getClusterConfiguration().getMachineNum() * (ppConfig.getClusterConfiguration().getMachineCores() / 2); LOG.info("Reducers : " + reducers); job.setNumReduceTasks(reducers); // Execute job and return status boolean result = job.waitForCompletion(true); jobs.add(job); // commit results if (result) { commitRoundIndexOutputFiles(roundInputFile, new Path(roundOutputPath), new Path(ppConfig.getKmerIndexPath()), job.getConfiguration(), ppConfig.getKmerSize()); // create index of index createIndexOfIndex(new Path(ppConfig.getKmerIndexPath()), roundInputFile, job.getConfiguration(), ppConfig.getKmerSize()); } if (!result) { LOG.error("job failed at round " + round + " of " + inputFiles.length); job_result = false; break; } } // report if (ppConfig.getReportPath() != null && !ppConfig.getReportPath().isEmpty()) { Report report = new Report(); report.addJob(jobs); report.writeTo(ppConfig.getReportPath()); } return job_result ? 0 : 1; }
From source file:kogiri.mapreduce.readfrequency.modecount.ModeCounter.java
License:Open Source License
private int runJob(ReadFrequencyCounterConfig rfConfig) throws Exception { // check config validateReadFrequencyCounterConfig(rfConfig); // configuration Configuration conf = this.getConf(); // set user configuration rfConfig.getClusterConfiguration().configureTo(conf); rfConfig.saveTo(conf);//from w w w. j av a 2s . c o m // table file Path tableFilePath = new Path(rfConfig.getKmerMatchPath(), KmerMatchHelper.makeKmerMatchTableFileName()); FileSystem fs = tableFilePath.getFileSystem(conf); KmerMatchFileMapping fileMapping = KmerMatchFileMapping.createInstance(fs, tableFilePath); Path[] inputFiles = KmerMatchHelper.getAllKmerMatchResultFilePath(conf, rfConfig.getKmerMatchPath()); // Register named outputs NamedOutputs namedOutputs = new NamedOutputs(); for (int i = 0; i < fileMapping.getSize(); i++) { String fastaFileName = fileMapping.getFastaFileFromID(i); namedOutputs.add(fastaFileName); } namedOutputs.saveTo(conf); boolean job_result = true; List<Job> jobs = new ArrayList<Job>(); for (int round = 0; round < fileMapping.getSize(); round++) { String roundOutputPath = rfConfig.getReadFrequencyPath() + "_round" + round; Job job = new Job(conf, "Kogiri Preprocessor - Computing Mode of Kmer Frequency (" + round + " of " + fileMapping.getSize() + ")"); job.setJarByClass(ModeCounter.class); // Mapper job.setMapperClass(ModeCounterMapper.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(MultiFileIntWritable.class); job.setMapOutputValueClass(CompressedIntArrayWritable.class); // Combiner job.setCombinerClass(ModeCounterCombiner.class); // Partitioner job.setPartitionerClass(ModeCounterPartitioner.class); // Reducer job.setReducerClass(ModeCounterReducer.class); // Specify key / value job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Inputs FileInputFormat.addInputPaths(job, FileSystemHelper.makeCommaSeparated(inputFiles)); ModeCounterConfig modeCounterConfig = new ModeCounterConfig(); modeCounterConfig.setMasterFileID(round); modeCounterConfig.saveTo(job.getConfiguration()); FileOutputFormat.setOutputPath(job, new Path(roundOutputPath)); job.setOutputFormatClass(TextOutputFormat.class); for (NamedOutputRecord namedOutput : namedOutputs.getRecord()) { MultipleOutputs.addNamedOutput(job, namedOutput.getIdentifier(), TextOutputFormat.class, Text.class, Text.class); } // Execute job and return status boolean result = job.waitForCompletion(true); jobs.add(job); // commit results if (result) { commitRoundOutputFiles(new Path(roundOutputPath), new Path(rfConfig.getReadFrequencyPath()), job.getConfiguration(), namedOutputs, round); } if (!result) { LOG.error("job failed at round " + round + " of " + fileMapping.getSize()); job_result = false; break; } } // report if (rfConfig.getReportPath() != null && !rfConfig.getReportPath().isEmpty()) { Report report = new Report(); report.addJob(jobs); report.writeTo(rfConfig.getReportPath()); } return job_result ? 0 : 1; }
From source file:libra.preprocess.stage2.KmerIndexBuilder.java
License:Apache License
private int runJob(PreprocessorConfig ppConfig) throws Exception { // check config validatePreprocessorConfig(ppConfig); // configuration Configuration conf = this.getConf(); // set user configuration ppConfig.saveTo(conf);// w w w . j a v a 2 s. co m Path[] inputFiles = FileSystemHelper.getAllFastaFilePath(conf, ppConfig.getFastaPath()); boolean job_result = true; List<Job> jobs = new ArrayList<Job>(); for (int round = 0; round < inputFiles.length; round++) { Path roundInputFile = inputFiles[round]; String roundOutputPath = ppConfig.getKmerIndexPath() + "_round" + round; Job job = new Job(conf, "Libra Preprocessor - Building Kmer Indexes (" + round + " of " + inputFiles.length + ")"); job.setJarByClass(KmerIndexBuilder.class); // Mapper job.setMapperClass(KmerIndexBuilderMapper.class); FastaKmerInputFormat.setKmerSize(conf, ppConfig.getKmerSize()); job.setInputFormatClass(FastaKmerInputFormat.class); job.setMapOutputKeyClass(CompressedSequenceWritable.class); job.setMapOutputValueClass(IntWritable.class); // Combiner job.setCombinerClass(KmerIndexBuilderCombiner.class); // Partitioner job.setPartitionerClass(KmerIndexBuilderPartitioner.class); // Reducer job.setReducerClass(KmerIndexBuilderReducer.class); // Specify key / value job.setOutputKeyClass(CompressedSequenceWritable.class); job.setOutputValueClass(IntWritable.class); // Inputs FileInputFormat.addInputPaths(job, roundInputFile.toString()); LOG.info("Input file : "); LOG.info("> " + roundInputFile.toString()); String histogramFileName = KmerHistogramHelper.makeKmerHistogramFileName(roundInputFile.getName()); Path histogramPath = new Path(ppConfig.getKmerHistogramPath(), histogramFileName); KmerIndexBuilderPartitioner.setHistogramPath(job.getConfiguration(), histogramPath); FileOutputFormat.setOutputPath(job, new Path(roundOutputPath)); job.setOutputFormatClass(MapFileOutputFormat.class); // Use many reducers int reducers = conf.getInt("mapred.reduce.tasks", 0); if (reducers <= 0) { int MRNodes = MapReduceClusterHelper.getNodeNum(conf); reducers = MRNodes * 2; job.setNumReduceTasks(reducers); } LOG.info("Reducers : " + reducers); // Execute job and return status boolean result = job.waitForCompletion(true); jobs.add(job); // commit results if (result) { commitRoundIndexOutputFiles(roundInputFile, new Path(roundOutputPath), new Path(ppConfig.getKmerIndexPath()), job.getConfiguration(), ppConfig.getKmerSize()); // create index of index createIndexOfIndex(new Path(ppConfig.getKmerIndexPath()), roundInputFile, job.getConfiguration(), ppConfig.getKmerSize()); // create statistics of index createStatisticsOfIndex(new Path(ppConfig.getKmerStatisticsPath()), roundInputFile, job.getConfiguration(), job.getCounters(), ppConfig.getKmerSize()); } if (!result) { LOG.error("job failed at round " + round + " of " + inputFiles.length); job_result = false; break; } } // report if (ppConfig.getReportPath() != null && !ppConfig.getReportPath().isEmpty()) { Report report = new Report(); report.addJob(jobs); report.writeTo(ppConfig.getReportPath()); } return job_result ? 0 : 1; }
From source file:mapreduce.wordcount.WordCountSimple.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCountSimple.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:mapReduceBasics.WordCount.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser GOP = new GenericOptionsParser(conf, args); Configuration newConf = GOP.getConfiguration(); String[] otherArgs = GOP.getRemainingArgs(); System.err.println("Best of Luck"); Job job = new Job(newConf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); Path inputPath = new Path(otherArgs[0]); Path outputPath = new Path(otherArgs[1]); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileSystem fs = FileSystem.get(conf); fs.delete(outputPath, true);//from w w w . ja v a 2s.co m // job.setNumReduceTasks(3); System.err.println("Input Format - " + job.getInputFormatClass()); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:maxint.maxint.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: max integer <in> <out>"); System.exit(2);//from w w w . j av a 2s . c o m } Job job = new Job(conf, "max integer"); job.setJarByClass(maxint.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(MaxIntReducer.class); job.setReducerClass(MaxIntReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:merge.MergeSameData.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: StatisticsTotalDataNum <in> <out>"); System.exit(2);/*from www .j ava 2 s . c om*/ } Job job = new Job(conf, "Statistics Total Data Num"); job.setJarByClass(MergeSameData.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:muming.examples.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage Of Muming: wordcount <in> [<in>...] <out>"); System.exit(2);//from www. j a va 2 s .co m } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:mvm.rya.accumulo.mr.eval.AccumuloRdfCountTool.java
License:Apache License
/** * cloudbase props// w w w. j a v a 2 s.c om */ @Override public int run(String[] strings) throws Exception { conf.set(MRUtils.JOB_NAME_PROP, "Gather Evaluation Statistics"); //initialize init(); Job job = new Job(conf); job.setJarByClass(AccumuloRdfCountTool.class); setupInputFormat(job); AccumuloInputFormat.setRanges(job, Lists.newArrayList(new Range(new Text(new byte[] {}), new Text(new byte[] { Byte.MAX_VALUE })))); // set input output of the particular job job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Mutation.class); // set mapper and reducer classes job.setMapperClass(CountPiecesMapper.class); job.setCombinerClass(CountPiecesCombiner.class); job.setReducerClass(CountPiecesReducer.class); String outputTable = tablePrefix + RdfCloudTripleStoreConstants.TBL_EVAL_SUFFIX; setupOutputFormat(job, outputTable); // Submit the job Date startTime = new Date(); System.out.println("Job started: " + startTime); int exitCode = job.waitForCompletion(true) ? 0 : 1; if (exitCode == 0) { Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; } else { System.out.println("Job Failed!!!"); } return -1; }