List of usage examples for org.apache.hadoop.mapred JobConf setCombinerClass
public void setCombinerClass(Class<? extends Reducer> theClass)
From source file:crunch.MaxTemperature.java
License:Apache License
/** * Create a JobConf for a Job that will calculate the number of unique listeners per track. * // w ww . j a v a2s .c o m * @param inputDir The path to the folder containing the raw listening data files. * @return The unique listeners JobConf. */ private JobConf getUniqueListenersJobConf(Path inputDir) { log.info("Creating configuration for unique listeners Job"); // output results to a temporary intermediate folder, this will get deleted by start() method Path uniqueListenersOutput = new Path("uniqueListeners"); JobConf conf = new JobConf(TrackStatisticsProgram.class); conf.setOutputKeyClass(IntWritable.class); // track id conf.setOutputValueClass(IntWritable.class); // number of unique listeners conf.setInputFormat(TextInputFormat.class); // raw listening data conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapperClass(UniqueListenersMapper.class); conf.setCombinerClass(UniqueListenersCombiner.class); conf.setReducerClass(UniqueListenersReducer.class); FileInputFormat.addInputPath(conf, inputDir); FileOutputFormat.setOutputPath(conf, uniqueListenersOutput); conf.setJobName("uniqueListeners"); return conf; }
From source file:crunch.MaxTemperature.java
License:Apache License
/** * Creates a JobConf for a Job that will sum up the TrackStatistics per track. * //from w w w . ja v a2s. c o m * @param inputDir The path to the folder containing the raw input data files. * @return The sum JobConf. */ private JobConf getSumJobConf(Path inputDir) { log.info("Creating configuration for sum job"); // output results to a temporary intermediate folder, this will get deleted by start() method Path playsOutput = new Path("sum"); JobConf conf = new JobConf(TrackStatisticsProgram.class); conf.setOutputKeyClass(IntWritable.class); // track id conf.setOutputValueClass(TrackStats.class); // statistics for a track conf.setInputFormat(TextInputFormat.class); // raw listening data conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapperClass(SumMapper.class); conf.setCombinerClass(SumReducer.class); conf.setReducerClass(SumReducer.class); FileInputFormat.addInputPath(conf, inputDir); FileOutputFormat.setOutputPath(conf, playsOutput); conf.setJobName("sum"); return conf; }
From source file:crunch.MaxTemperature.java
License:Apache License
/** * Creates a JobConf for a Job that will merge the unique listeners and track statistics. * /* w w w . jav a 2 s . c om*/ * @param outputPath The path for the results to be output to. * @param sumInputDir The path containing the data from the sum Job. * @param listenersInputDir The path containing the data from the unique listeners job. * @return The merge JobConf. */ private JobConf getMergeConf(Path outputPath, Path sumInputDir, Path listenersInputDir) { log.info("Creating configuration for merge job"); JobConf conf = new JobConf(TrackStatisticsProgram.class); conf.setOutputKeyClass(IntWritable.class); // track id conf.setOutputValueClass(TrackStats.class); // overall track statistics conf.setCombinerClass(SumReducer.class); // safe to re-use reducer as a combiner here conf.setReducerClass(SumReducer.class); conf.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(conf, outputPath); MultipleInputs.addInputPath(conf, sumInputDir, SequenceFileInputFormat.class, IdentityMapper.class); MultipleInputs.addInputPath(conf, listenersInputDir, SequenceFileInputFormat.class, MergeListenersMapper.class); conf.setJobName("merge"); return conf; }
From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob4.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), GoogleSyntacticsJob4.class); conf.setJobName(GoogleSyntacticsJob4.class.getSimpleName()); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); // delete output path for testing purposes // FileSystem.get(conf).delete(new Path(args[1]), true); String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS); if (extractorConfigurationFiles == null) { extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ','); System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n", SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); conf.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); }//from w w w . ja v a 2s. c om String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(","); for (int i = 0; i < extractorConfigurationFilesArr.length; i++) DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), conf); conf.setMapperClass(GoogleSyntacticsJob4Mapper.class); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(NullWritable.class); conf.setMapOutputValueClass(Text.class); conf.setNumReduceTasks(0); conf.setCombinerClass(IdentityReducer.class); JobClient.runJob(conf); return 0; }
From source file:de.tudarmstadt.lt.nlkg.ConvertInvertSVO.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), ConvertInvertSVO.class); conf.setJobName(ConvertInvertSVO.class.getSimpleName()); conf.setMapperClass(ConversionMapper.class); conf.setCombinerClass(IdentityReducer.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ConvertedWritable.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/* w w w. j av a2 s .c o m*/ return 0; }
From source file:de.tudarmstadt.lt.nlkg.ConvertSVO.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), ConvertSVO.class); conf.setJobName(ConvertSVO.class.getSimpleName()); conf.setMapperClass(ConversionMapper.class); conf.setCombinerClass(IdentityReducer.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ConvertedWritable.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);//ww w. j a va2s.c o m return 0; }
From source file:dinocode.SpeciesGraphBuilder.java
public static void main(String[] args) throws Exception { JobClient client = new JobClient(); JobConf conf = new JobConf(SpeciesDriver.class); conf.setJobName("Page-rank Species Graph Builder"); final File f = new File(SpeciesDriver.class.getProtectionDomain().getCodeSource().getLocation().getPath()); String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/InputFiles/species_medium.txt"; String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); //conf.setOutputKeyClass(Text.class); //conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesGraphBuilderMapperd.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); conf.setReducerClass(SpeciesGraphBuilderReducerd.class); //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); //conf.setInputPath(new Path("graph1")); //conf.setOutputPath(new Path("graph2")); // take the input and output from the command line FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); client.setConf(conf);/*from w ww. j av a 2 s.co m*/ try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result/part-00000"; for (int i = 0; i < 500; i++) { client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Iter"); int count = i + 1; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result" + count; conf.setNumReduceTasks(5); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesIterMapper2d.class); conf.setReducerClass(SpeciesIterReducer2d.class); conf.setCombinerClass(SpeciesIterReducer2d.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = outFiles; } //Viewer client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Viewer"); conf.setOutputKeyClass(FloatWritable.class); conf.setOutputValueClass(Text.class); inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result500/part-00000"; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/ResultFinal"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesViewerMapperd.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark2.java
License:Open Source License
public int run(String[] args) throws Exception { BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args); JobConf job = base.getJobConf(); job.setInputFormat(base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); //job.setInputFormat(KeyValueSetInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); if (base.getTupleData()) { job.setMapperClass(Benchmark2.TupleWritableMap.class); } else {/* w w w .j a v a 2 s .c o m*/ job.setMapperClass(Benchmark2.TextMap.class); } job.setCombinerClass(Benchmark2.Reduce.class); job.setReducerClass(Benchmark2.Reduce.class); //job.setNumReduceTasks(0); try { base.runJob(job); if (base.getCombine()) base.runCombine(); } catch (Exception ex) { ex.printStackTrace(); System.exit(1); } return 0; }
From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark4.java
License:Open Source License
public int run(String[] args) throws Exception { BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args); JobConf job = base.getJobConf(); job.setInputFormat(TextInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(Benchmark4.Map.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); try {/*w ww. j ava2 s.com*/ job.setCompressMapOutput(base.getCompress()); base.runJob(job); if (base.getCombine()) base.runCombine(); } catch (Exception ex) { ex.printStackTrace(); System.exit(1); } return 0; }
From source file:edu.ldzm.analysis.AnalysisSummary.java
License:Apache License
/** * The main driver for word count map/reduce program. Invoke this method to * submit the map/reduce job./*from ww w. j ava2 s . co m*/ * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), AnalysisSummary.class); conf.setJobName("analysis_summery"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Combine.class); conf.setReducerClass(Reduce.class); boolean param = false; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-l".equals(args[i])) { param = true; String[] fields = args[++i].split(SEPARATOR); conf.setInt("NAME_LIST_LENGTH", fields.length); for (int j = 0; j < fields.length; j++) { if ("timeStamp".equals(fields[j])) { conf.setInt("REQUEST_TIME_INDEX", j); } else if ("elapsed".equals(fields[j])) { conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j); } else if ("label".equals(fields[j])) { conf.setInt("REQUEST_LABEL_INDEX", j); } else if ("success".equals(fields[j])) { conf.setInt("REQUEST_SUCCESSFUL_INDEX", j); } else if ("bytes".equals(fields[j])) { conf.setInt("REQUEST_BYTE_INDEX", j); } } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } if (!param) { System.out.println("-l namelist.txt"); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }