List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(GrepTaskDB.class); conf.setJobName("grep_db_job"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);// ww w . ja v a 2s. c o m // GREP arguments conf.setOutputFormat(TextOutputFormat.class); for (int i = 0; i < args.length; ++i) { if ("-pattern".equals(args[i])) conf.set("pattern", args[++i]); else if ("-output".equals(args[i])) conf.set("output", args[++i]); } // OUTPUT properties Path outputPath = new Path(conf.get("output")); System.out.println(conf.get("output")); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); // DB properties conf.set(DBConst.DB_RELATION_ID, "grep"); conf.set(DBConst.DB_RECORD_READER, DocumentsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT key1, field FROM grep WHERE field LIKE '%" + conf.get("pattern") + "%';"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("grep_hdfs"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);/*from ww w.ja va 2 s .co m*/ conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 3) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); } conf.set(GREP_PATTERN_PARAM, args[0]); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputPath = new Path(args[2]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(JoinTaskDB.class); conf.setJobName("join_db"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setNumReduceTasks(1); // Because we look for 1 TOP value // join arguments conf.setOutputFormat(TextOutputFormat.class); for (int i = 0; i < args.length; ++i) { if ("-date_l".equals(args[i])) conf.set("date_l", args[++i]); else if ("-date_u".equals(args[i])) conf.set("date_u", args[++i]); else if ("-output".equals(args[i])) conf.set("output", args[++i]); }// w ww . j a v a2s.co m // OUTPUT properties Path outputPath = new Path(conf.get("output")); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "UserVisits"); conf.set(DBConst.DB_RECORD_READER, JoinRecord.class.getName()); String TABLE_R = "Rankings"; String TABLE_UV = "UserVisits"; conf.set(DBConst.DB_SQL_QUERY, "SELECT sourceIP, SUM(pageRank) as sumPageRank, COUNT(pageRank) as countPageRank, SUM(adRevenue) as totalRevenue " + "FROM " + TABLE_R + " AS R, " + TABLE_UV + " AS UV " + "WHERE R.pageURL = UV.destURL " + "AND UV.visitDate BETWEEN '" + conf.get("date_l") + "' AND '" + conf.get("date_u") + "' " + "GROUP BY UV.sourceIP;"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskHDFS.java
License:Apache License
public int run(String[] args) throws Exception { long startTime = System.currentTimeMillis(); if (args.length < 5) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }// ww w . j a v a 2s .c om String dateFrom = args[0]; String dateTo = args[1]; String rankingsInputDir = args[2]; String userVisitsInputDir = args[3]; String outputDir = args[4]; // output path (delete) Path outputPath = new Path(outputDir); HDFSUtil.deletePath(outputPath); // phase 1 JobConf conf1 = new JobConf(this.getClass()); conf1.setJobName("join_hdfs_phase1"); Path p1Output = new Path(outputDir + "/phase1"); FileOutputFormat.setOutputPath(conf1, p1Output); conf1.setInputFormat(TextInputFormat.class); conf1.setOutputFormat(TextOutputFormat.class); conf1.setOutputKeyClass(Text.class); conf1.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf1, new Path(rankingsInputDir), new Path(userVisitsInputDir)); conf1.set(DATE_FROM_PARAM, dateFrom); conf1.set(DATE_TO_PARAM, dateTo); conf1.setMapperClass(Phase1Map.class); conf1.setReducerClass(Phase1Reduce.class); // conf1.setPartitionerClass(theClass) RunningJob job1 = JobClient.runJob(conf1); if (job1.isSuccessful()) { // phase 2 JobConf conf2 = new JobConf(this.getClass()); conf2.setJobName("join_hdfs_phase2"); conf2.setInputFormat(KeyValueTextInputFormat.class); conf2.setOutputFormat(TextOutputFormat.class); conf2.setOutputKeyClass(Text.class); conf2.setOutputValueClass(Text.class); conf2.setMapperClass(IdentityMapper.class); conf2.setReducerClass(Phase2Reduce.class); Path p2Output = new Path(outputDir + "/phase2"); FileOutputFormat.setOutputPath(conf2, p2Output); FileInputFormat.setInputPaths(conf2, p1Output); RunningJob job2 = JobClient.runJob(conf2); if (job2.isSuccessful()) { // phase 3 JobConf conf3 = new JobConf(this.getClass()); conf3.setJobName("join_hdfs_phase3"); conf3.setNumReduceTasks(1); conf3.setInputFormat(KeyValueTextInputFormat.class); conf3.setOutputKeyClass(Text.class); conf3.setOutputValueClass(Text.class); conf3.setMapperClass(IdentityMapper.class); conf3.setReducerClass(Phase3Reduce.class); Path p3Output = new Path(outputDir + "/phase3"); FileOutputFormat.setOutputPath(conf3, p3Output); FileInputFormat.setInputPaths(conf3, p2Output); RunningJob job3 = JobClient.runJob(conf3); if (!job3.isSuccessful()) { System.out.println("PHASE 3 FAILED!!!"); } } else { System.out.println("PHASE 2 FAILED!!!"); } } else { System.out.println("PHASE 1 FAILED!!!"); } long endTime = System.currentTimeMillis(); System.out.println("\nJOB TIME : " + (endTime - startTime) + " ms.\n"); return 0; }
From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("selection_db"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);// ww w. j a va 2 s . co m if (args.length < 2) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); } conf.set(PAGE_RANK_VALUE_PARAM, args[0]); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "Rankings"); conf.set(DBConst.DB_RECORD_READER, RankingsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT pageURL, pageRank FROM Rankings " + "WHERE pageRank > " + conf.get(PAGE_RANK_VALUE_PARAM) + ";"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("selection_hdfs"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);/* w ww .ja va 2 s.co m*/ conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 3) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); } conf.set(PAGE_RANK_VALUE_PARAM, args[0]); FileInputFormat.setInputPaths(conf, new Path(args[1])); // OUTPUT properties Path outputPath = new Path(args[2]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(this.getClass()); conf.setJobName("udf_agg_db"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(Query4Map.class); conf.setCombinerClass(LongSumReducer.class); conf.setReducerClass(LongSumReducer.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 1) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/*from w w w . java2 s .c o m*/ // OUTPUT properties Path outputPath = new Path(args[0]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "Documents"); conf.set(DBConst.DB_RECORD_READER, DocumentRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT url, contents FROM Documents;"); return conf; }
From source file:edu.yale.cs.hadoopdb.dataloader.GlobalHasher.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("GlobalHasher"); conf.setMapOutputKeyClass(UnsortableInt.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(GlobalHasher.Map.class); conf.setReducerClass(GlobalHasher.Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 5) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }//w ww . j a va 2 s. c o m FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); int partNo = Integer.parseInt(args[2]); conf.setNumReduceTasks(partNo); conf.set(DELIMITER_PARAM, args[3]); int hashFieldPos = Integer.parseInt(args[4]); conf.setInt(HASH_FIELD_POS_PARAM, hashFieldPos); return conf; }
From source file:edu.yale.cs.hadoopdb.exec.DBJobBase.java
License:Apache License
/** * Job config initialization (command-line params etc). */// w w w . j a v a 2 s . c om protected JobConf initConf(String[] args) throws Exception { List<String> other_args = new ArrayList<String>(); Path configuration_file = null; boolean replication = false; for (int i = 0; i < args.length; ++i) { if (("-" + DBConst.DB_CONFIG_FILE).equals(args[i])) { configuration_file = new Path(args[++i]); } else if ("-replication".equals(args[i])) { replication = true; } else { other_args.add(args[i]); } } JobConf conf = null; conf = configureJob(other_args.toArray(new String[0])); LOG.info(conf.getJobName()); LOG.info(conf.get(DBConst.DB_SQL_QUERY)); if (conf.get(DBConst.DB_RELATION_ID) == null || conf.get(DBConst.DB_SQL_QUERY) == null || conf.get(DBConst.DB_RECORD_READER) == null) { throw new Exception( "ERROR: DB Job requires a relation, an SQL Query and a Record Reader class to be configured.\n" + "Please specify using: conf.set(\"" + DBConst.DB_RELATION_ID + "\", <relation name>), conf.set(\"" + DBConst.DB_SQL_QUERY + "\", <SQL QUERY>)\n" + "and code an appropriate Record Reader and specify conf.set(\"" + DBConst.DB_RECORD_READER + "\", <Record reader class name>)\n"); } if (replication) { conf.setBoolean(DBConst.DB_REPLICATION, true); } if (configuration_file == null) { if (conf.get(DBConst.DB_CONFIG_FILE) == null) { throw new Exception("No HadoopDB config file!"); } } else { conf.set(DBConst.DB_CONFIG_FILE, configuration_file.toString()); } setInputFormat(conf); return conf; }
From source file:etl.cmd.test.XFsTestCase.java
License:Apache License
/** * Return a JobClient to the test JobTracker. * * @return a JobClient to the test JobTracker. * @throws HadoopAccessorException thrown if the JobClient could not be obtained. *///from w w w.jav a 2 s. c om protected JobClient createJobClient() throws HadoopAccessorException { JobConf conf = has.createJobConf(getJobTrackerUri()); conf.set("mapreduce.jobtracker.address", getJobTrackerUri()); conf.set("fs.defaultFS", getNameNodeUri()); return has.createJobClient(getTestUser(), conf); }