List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:Assignment2_P5_IPAddressCount.IPAddressDriver.java
/** * @param args the command line arguments */// w w w.j a va 2 s .com public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IP Address Count"); job.setJarByClass(IPAddressDriver.class); job.setMapperClass(IPAddress_Mapper.class); job.setCombinerClass(IPAddress_Reducer.class); job.setReducerClass(IPAddress_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment3_P2_MergeStockAverageCount.StockPriceMergeDriver.java
/** * @param args the command line arguments *///w ww.j a va 2 s .c om public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); // local file system handle FileSystem local = FileSystem.getLocal(conf); // hdfs file system handle FileSystem hdfs = FileSystem.get(conf); // local input directory Path inputDir = new Path(args[0]); // hdfs i/p directory Path inputDir1 = new Path(args[1]); // local input files in local dir FileStatus[] inputFiles = local.listStatus(inputDir); // o/p stream FSDataOutputStream out = hdfs.create(inputDir1); // open each file and extract contents of file for (int i = 0; i < inputFiles.length; i++) { System.out.println("File name ----------------------------------------------------------------> " + inputFiles[i].getPath().getName()); FSDataInputStream in = local.open(inputFiles[i].getPath()); byte buffer[] = new byte[256]; int bytesRead = 0; // extract all contents of file while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } // close input stream in.close(); } Job job = Job.getInstance(conf, "Average Stock Price"); job.setJarByClass(StockPriceMergeDriver.class); job.setMapperClass(StockPriceMerge_Mapper.class); job.setCombinerClass(StockPriceMerge_Reducer.class); job.setReducerClass(StockPriceMerge_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); FileInputFormat.addInputPath(job, new Path(args[1])); // above programs output will be input for mapper FileOutputFormat.setOutputPath(job, new Path(args[2])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment3_P5_Top25Movies.Top25MovieRatingDriver.java
/** * @param args the command line arguments *///from www .j a va 2s.c o m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job1 = Job.getInstance(conf, "Movie Rating Count"); job1.setJarByClass(Top25MovieRatingDriver.class); // the usual - get basic mapred ready job1.setMapperClass(Top25MovieRating_Mapper.class); job1.setCombinerClass(Top25MovieRating_Reducer.class); job1.setReducerClass(Top25MovieRating_Reducer.class); // this will basically out -> movieId, average rating job1.setOutputKeyClass(IntWritable.class); job1.setOutputValueClass(FloatWritable.class); FileInputFormat.addInputPath(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1])); boolean complete = job1.waitForCompletion(true); // here's where we sort Configuration conf2 = new Configuration(); Job job2 = Job.getInstance(conf2, "Movie Rating Count"); if (complete) { job2.setJarByClass(Top25MovieRatingDriver.class); // namesake fellow, take it and go types - mostly useless job2.setMapperClass(Top25MovieRating_Mapper1.class); job2.setMapOutputKeyClass(FloatWritable.class); job2.setMapOutputValueClass(IntWritable.class); // this is where we would ideally sort descendingly job2.setSortComparatorClass(Top25MovieRating_SortComparator.class); // o/p top 25, man job2.setNumReduceTasks(1); job2.setReducerClass(Top25MovieRating_Reducer1.class); job2.setOutputKeyClass(FloatWritable.class); job2.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job2, new Path(args[1])); FileOutputFormat.setOutputPath(job2, new Path(args[2])); System.exit(job2.waitForCompletion(true) ? 0 : 1); } }
From source file:Assignment4_P2_StockAverageWithCombiner.StockAverageDriver.java
/** * @param args the command line arguments *///from w w w . j a v a 2 s.com public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Average Stock Price"); job.setJarByClass(StockAverageDriver.class); job.setMapperClass(StockAverage_Mapper.class); job.setCombinerClass(StockAverage_Combiner.class); job.setReducerClass(StockAverage_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(StockAverage_CompositeValueWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment4_P4_MemoryConscious.MovieRatingMemConsciousDriver.java
/** * @param args the command line arguments *///from ww w . j av a2s .co m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Movie Rating Mem Conscious Standard Deviation"); job.setJarByClass(MovieRatingMemConsciousDriver.class); job.setMapperClass(MovieRatingMemConscious_Mapper.class); job.setCombinerClass(MovingRatingMemConscious_Combiner.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SortedMapWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setReducerClass(MovieRatingMemConscious_Reducer.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment5_P2_DistinctIPAddress.DistinctIPAddressDriver.java
/** * @param args the command line arguments *//*from w w w .j av a 2s. c om*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IP Address Count"); job.setJarByClass(DistinctIPAddressDriver.class); job.setMapperClass(DistinctIPAddress_Mapper.class); job.setCombinerClass(DistinctIPAddress_Reducer.class); job.setReducerClass(DistinctIPAddress_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:averagerating_youtube.AverageRating_Youtube.java
/** * @param args the command line arguments *//*from ww w. j a va 2s . c o m*/ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJobName("AverageRating_Youtube"); job.setJarByClass(AverageRating_Youtube.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(AvgRating_CommCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AverageRating_CommentCountTuple.class); job.setCombinerClass(AvgRating_CommCountCombiner.class); job.setReducerClass(AvgRating_CommCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(AverageRating_CommentCountTuple.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:Business.MapReduceOne.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "FirstJob"); job.setJarByClass(MapReduceOne.class); final File f = new File(MapReduceOne.class.getProtectionDomain().getCodeSource().getLocation().getPath()); String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/inFiles/"; String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outFiles/OutputOne"; //use the arguments instead if provided. if (args.length > 1) { inFiles = args[1];/*from ww w. jav a 2 s .c om*/ outFiles = args[2]; } Path in = new Path(inFiles); Path out = new Path(outFiles); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setMapperClass(Mapper1.class); job.setCombinerClass(Reducer1.class); job.setReducerClass(Reducer1.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:byte_import.HexastoreBulkImport.java
License:Open Source License
public Job createSubmittableJob(String[] args) { TABLE_NAME = args[1];/*from w w w .j a v a 2 s. co m*/ Job job = null; try { job = new Job(new Configuration(), NAME); job.setJarByClass(HexastoreBulkImport.class); job.setMapperClass(sampler.TotalOrderPrep.Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Combiner.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setPartitionerClass(TotalOrderPartitioner.class); //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000")); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000")); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); Path out = new Path("out"); FileOutputFormat.setOutputPath(job, out); Configuration conf = new Configuration(); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } } catch (IOException e) { e.printStackTrace(); } HBaseAdmin hadmin = new HBaseAdmin(conf); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats"); HColumnDescriptor family = new HColumnDescriptor("size"); desc.addFamily(family); conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME + "_stats")) { //hadmin.disableTable(TABLE_NAME+"_stats"); //hadmin.deleteTable(TABLE_NAME+"_stats"); } else { hadmin.createTable(desc); } FileInputFormat.setInputPaths(job, new Path(args[0])); //job.getConfiguration().setInt("mapred.map.tasks", 18); job.getConfiguration().set("h2rdf.tableName", TABLE_NAME); job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5); job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5); //job.getConfiguration().setInt("io.sort.mb", 100); } catch (IOException e2) { e2.printStackTrace(); } return job; }
From source file:ca.uwaterloo.cs.bigdata2017w.assignment0.PerfectX.java
License:Apache License
/** * Runs this tool./*from www . j av a 2 s . com*/ */ @Override public int run(String[] argv) throws Exception { final Args args = new Args(); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100)); try { parser.parseArgument(argv); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } LOG.info("Tool: " + PerfectX.class.getSimpleName()); LOG.info(" - input path: " + args.input); LOG.info(" - output path: " + args.output); LOG.info(" - number of reducers: " + args.numReducers); LOG.info(" - use in-mapper combining: " + args.imc); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(PerfectX.class.getSimpleName()); job.setJarByClass(PerfectX.class); job.setNumReduceTasks(args.numReducers); FileInputFormat.setInputPaths(job, new Path(args.input)); FileOutputFormat.setOutputPath(job, new Path(args.output)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(args.imc ? MyMapperIMC.class : MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(args.output); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }