List of usage examples for org.apache.hadoop.mapred JobConf setMapperClass
public void setMapperClass(Class<? extends Mapper> theClass)
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskSmallHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(this.getClass()); conf.setJobName("aggregation_hdfs_small"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(AggTaskSmallHDFS.Map.class); conf.setCombinerClass(AggTaskSmallHDFS.Reduce.class); conf.setReducerClass(AggTaskSmallHDFS.Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 2) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/*from w w w .j a va 2 s . c om*/ FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(GrepTaskDB.class); conf.setJobName("grep_db_job"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);//w w w . ja v a2s. c o m // GREP arguments conf.setOutputFormat(TextOutputFormat.class); for (int i = 0; i < args.length; ++i) { if ("-pattern".equals(args[i])) conf.set("pattern", args[++i]); else if ("-output".equals(args[i])) conf.set("output", args[++i]); } // OUTPUT properties Path outputPath = new Path(conf.get("output")); System.out.println(conf.get("output")); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); // DB properties conf.set(DBConst.DB_RELATION_ID, "grep"); conf.set(DBConst.DB_RECORD_READER, DocumentsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT key1, field FROM grep WHERE field LIKE '%" + conf.get("pattern") + "%';"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("grep_hdfs"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);// w w w . j ava 2 s.c o m conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 3) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); } conf.set(GREP_PATTERN_PARAM, args[0]); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputPath = new Path(args[2]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(JoinTaskDB.class); conf.setJobName("join_db"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setNumReduceTasks(1); // Because we look for 1 TOP value // join arguments conf.setOutputFormat(TextOutputFormat.class); for (int i = 0; i < args.length; ++i) { if ("-date_l".equals(args[i])) conf.set("date_l", args[++i]); else if ("-date_u".equals(args[i])) conf.set("date_u", args[++i]); else if ("-output".equals(args[i])) conf.set("output", args[++i]); }/*w ww. j a v a2 s . c o m*/ // OUTPUT properties Path outputPath = new Path(conf.get("output")); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "UserVisits"); conf.set(DBConst.DB_RECORD_READER, JoinRecord.class.getName()); String TABLE_R = "Rankings"; String TABLE_UV = "UserVisits"; conf.set(DBConst.DB_SQL_QUERY, "SELECT sourceIP, SUM(pageRank) as sumPageRank, COUNT(pageRank) as countPageRank, SUM(adRevenue) as totalRevenue " + "FROM " + TABLE_R + " AS R, " + TABLE_UV + " AS UV " + "WHERE R.pageURL = UV.destURL " + "AND UV.visitDate BETWEEN '" + conf.get("date_l") + "' AND '" + conf.get("date_u") + "' " + "GROUP BY UV.sourceIP;"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskHDFS.java
License:Apache License
public int run(String[] args) throws Exception { long startTime = System.currentTimeMillis(); if (args.length < 5) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/* w w w. j av a 2 s .c o m*/ String dateFrom = args[0]; String dateTo = args[1]; String rankingsInputDir = args[2]; String userVisitsInputDir = args[3]; String outputDir = args[4]; // output path (delete) Path outputPath = new Path(outputDir); HDFSUtil.deletePath(outputPath); // phase 1 JobConf conf1 = new JobConf(this.getClass()); conf1.setJobName("join_hdfs_phase1"); Path p1Output = new Path(outputDir + "/phase1"); FileOutputFormat.setOutputPath(conf1, p1Output); conf1.setInputFormat(TextInputFormat.class); conf1.setOutputFormat(TextOutputFormat.class); conf1.setOutputKeyClass(Text.class); conf1.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf1, new Path(rankingsInputDir), new Path(userVisitsInputDir)); conf1.set(DATE_FROM_PARAM, dateFrom); conf1.set(DATE_TO_PARAM, dateTo); conf1.setMapperClass(Phase1Map.class); conf1.setReducerClass(Phase1Reduce.class); // conf1.setPartitionerClass(theClass) RunningJob job1 = JobClient.runJob(conf1); if (job1.isSuccessful()) { // phase 2 JobConf conf2 = new JobConf(this.getClass()); conf2.setJobName("join_hdfs_phase2"); conf2.setInputFormat(KeyValueTextInputFormat.class); conf2.setOutputFormat(TextOutputFormat.class); conf2.setOutputKeyClass(Text.class); conf2.setOutputValueClass(Text.class); conf2.setMapperClass(IdentityMapper.class); conf2.setReducerClass(Phase2Reduce.class); Path p2Output = new Path(outputDir + "/phase2"); FileOutputFormat.setOutputPath(conf2, p2Output); FileInputFormat.setInputPaths(conf2, p1Output); RunningJob job2 = JobClient.runJob(conf2); if (job2.isSuccessful()) { // phase 3 JobConf conf3 = new JobConf(this.getClass()); conf3.setJobName("join_hdfs_phase3"); conf3.setNumReduceTasks(1); conf3.setInputFormat(KeyValueTextInputFormat.class); conf3.setOutputKeyClass(Text.class); conf3.setOutputValueClass(Text.class); conf3.setMapperClass(IdentityMapper.class); conf3.setReducerClass(Phase3Reduce.class); Path p3Output = new Path(outputDir + "/phase3"); FileOutputFormat.setOutputPath(conf3, p3Output); FileInputFormat.setInputPaths(conf3, p2Output); RunningJob job3 = JobClient.runJob(conf3); if (!job3.isSuccessful()) { System.out.println("PHASE 3 FAILED!!!"); } } else { System.out.println("PHASE 2 FAILED!!!"); } } else { System.out.println("PHASE 1 FAILED!!!"); } long endTime = System.currentTimeMillis(); System.out.println("\nJOB TIME : " + (endTime - startTime) + " ms.\n"); return 0; }
From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("selection_db"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);//from w w w.j ava 2 s . c o m if (args.length < 2) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); } conf.set(PAGE_RANK_VALUE_PARAM, args[0]); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "Rankings"); conf.set(DBConst.DB_RECORD_READER, RankingsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT pageURL, pageRank FROM Rankings " + "WHERE pageRank > " + conf.get(PAGE_RANK_VALUE_PARAM) + ";"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("selection_hdfs"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);/*w w w. j a v a2s . c o m*/ conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 3) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); } conf.set(PAGE_RANK_VALUE_PARAM, args[0]); FileInputFormat.setInputPaths(conf, new Path(args[1])); // OUTPUT properties Path outputPath = new Path(args[2]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(this.getClass()); conf.setJobName("udf_agg_db"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(Query4Map.class); conf.setCombinerClass(LongSumReducer.class); conf.setReducerClass(LongSumReducer.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 1) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }//from ww w. j a v a 2s .c o m // OUTPUT properties Path outputPath = new Path(args[0]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "Documents"); conf.set(DBConst.DB_RECORD_READER, DocumentRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT url, contents FROM Documents;"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(this.getClass()); conf.setJobName("udf_agg_hdfs"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(LongSumReducer.class); conf.setReducerClass(LongSumReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 2) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/* w w w. ja v a 2s . c om*/ FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.dataloader.GlobalHasher.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("GlobalHasher"); conf.setMapOutputKeyClass(UnsortableInt.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(GlobalHasher.Map.class); conf.setReducerClass(GlobalHasher.Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 5) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/*from ww w . ja v a 2 s. co m*/ FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); int partNo = Integer.parseInt(args[2]); conf.setNumReduceTasks(partNo); conf.set(DELIMITER_PARAM, args[3]); int hashFieldPos = Integer.parseInt(args[4]); conf.setInt(HASH_FIELD_POS_PARAM, hashFieldPos); return conf; }