List of usage examples for org.apache.hadoop.mapred JobConf setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass)
From source file:edu.umn.cs.spatialHadoop.operations.ConvexHull.java
License:Open Source License
public static void convexHullMapReduce(Path inFile, Path userOutPath, OperationsParams params) throws IOException { JobConf job = new JobConf(params, ConvexHull.class); Path outPath = userOutPath;/*from w ww . j a v a 2 s. c o m*/ FileSystem outFs = (userOutPath == null ? inFile : userOutPath).getFileSystem(job); Shape shape = params.getShape("shape"); if (outPath == null) { do { outPath = new Path(inFile.toUri().getPath() + ".convex_hull_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outPath)); } else { if (outFs.exists(outPath)) { if (params.getBoolean("overwrite", false)) { outFs.delete(outPath, true); } else { throw new RuntimeException("Output path already exists and -overwrite flag is not set"); } } } job.setJobName("ConvexHull"); job.setClass(SpatialSite.FilterClass, ConvexHullFilter.class, BlockFilter.class); job.setMapperClass(IdentityMapper.class); job.setCombinerClass(ConvexHullReducer.class); job.setReducerClass(ConvexHullReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(shape.getClass()); job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); job.setOutputFormat(GridOutputFormat2.class); GridOutputFormat2.setOutputPath(job, outPath); JobClient.runJob(job); // If outputPath not set by user, automatically delete it if (userOutPath == null) outFs.delete(outPath, true); }
From source file:edu.umn.cs.spatialHadoop.operations.Skyline.java
License:Open Source License
private static void skylineMapReduce(Path inFile, Path userOutPath, OperationsParams params) throws IOException { JobConf job = new JobConf(params, Skyline.class); Path outPath = userOutPath;//from www . j a va 2 s. com FileSystem outFs = (userOutPath == null ? inFile : userOutPath).getFileSystem(job); Shape shape = params.getShape("shape"); if (outPath == null) { do { outPath = new Path(inFile.toUri().getPath() + ".skyline_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outPath)); } job.setJobName("Skyline"); job.setClass(SpatialSite.FilterClass, SkylineFilter.class, BlockFilter.class); job.setMapperClass(IdentityMapper.class); job.setCombinerClass(SkylineReducer.class); job.setReducerClass(SkylineReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(shape.getClass()); job.setInputFormat(ShapeIterInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); job.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outPath); JobClient.runJob(job); // If outputPath not set by user, automatically delete it if (userOutPath == null) outFs.delete(outPath, true); }
From source file:edu.umn.cs.sthadoop.operations.STJoin.java
License:Open Source License
/** * /*from w w w. j av a 2s. com*/ * @param inputPath * @param outputPath * @param params * @return * @throws IOException * @throws Exception * @throws InterruptedException */ private static long stJoin(Path inputPath, Path outputPath, OperationsParams params) throws IOException, Exception, InterruptedException { JobConf conf = new JobConf(new Configuration(), STJoin.class); FileSystem outfs = outputPath.getFileSystem(conf); outfs.delete(outputPath, true); conf.setJobName("STJoin"); // pass params to the join map-reduce conf.set("timedistance", params.get("timedistance")); conf.set("spacedistance", params.get("spacedistance")); // conf.setMapOutputKeyClass(LongWritable.class); // conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); // Mapper settings conf.setMapperClass(STJoinMap.class); // conf.setReducerClass(STJoinReduce.class); // conf.setCombinerClass(STJoinReduce.class); conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setNumReduceTasks(0); JobClient.runJob(conf).waitForCompletion(); outfs = inputPath.getFileSystem(conf); outfs.delete(inputPath); return 0; }
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskLargeDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("aggregation_db_large"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); if (args.length < 1) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }// ww w . j a v a 2 s .c o m // OUTPUT properties Path outputPath = new Path(args[0]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "UserVisits"); conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT sourceIP, SUM(adRevenue) AS sumAdRevenue " + "FROM UserVisits GROUP BY sourceIP;"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskLargeHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("aggregation_hdfs_large"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(AggTaskLargeHDFS.Map.class); conf.setCombinerClass(AggTaskLargeHDFS.Reduce.class); conf.setReducerClass(AggTaskLargeHDFS.Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 2) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/*from ww w . j a v a 2 s . com*/ FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskSmallDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("aggregation_db_small"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); if (args.length < 1) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/* w w w . j av a 2 s. c om*/ // OUTPUT properties Path outputPath = new Path(args[0]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "UserVisits"); conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT SUBSTRING(sourceIP, 1, 7) AS subSourceIP, SUM(adRevenue) AS sumAdRevenue FROM UserVisits GROUP BY subSourceIP;"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskSmallHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(this.getClass()); conf.setJobName("aggregation_hdfs_small"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(AggTaskSmallHDFS.Map.class); conf.setCombinerClass(AggTaskSmallHDFS.Reduce.class); conf.setReducerClass(AggTaskSmallHDFS.Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 2) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }//from ww w. j a v a 2s. c om FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(GrepTaskDB.class); conf.setJobName("grep_db_job"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);/*www . j a v a2 s .c o m*/ // GREP arguments conf.setOutputFormat(TextOutputFormat.class); for (int i = 0; i < args.length; ++i) { if ("-pattern".equals(args[i])) conf.set("pattern", args[++i]); else if ("-output".equals(args[i])) conf.set("output", args[++i]); } // OUTPUT properties Path outputPath = new Path(conf.get("output")); System.out.println(conf.get("output")); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); // DB properties conf.set(DBConst.DB_RELATION_ID, "grep"); conf.set(DBConst.DB_RECORD_READER, DocumentsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT key1, field FROM grep WHERE field LIKE '%" + conf.get("pattern") + "%';"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("grep_hdfs"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);/* w ww . java 2 s. com*/ conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 3) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); } conf.set(GREP_PATTERN_PARAM, args[0]); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputPath = new Path(args[2]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(JoinTaskDB.class); conf.setJobName("join_db"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setNumReduceTasks(1); // Because we look for 1 TOP value // join arguments conf.setOutputFormat(TextOutputFormat.class); for (int i = 0; i < args.length; ++i) { if ("-date_l".equals(args[i])) conf.set("date_l", args[++i]); else if ("-date_u".equals(args[i])) conf.set("date_u", args[++i]); else if ("-output".equals(args[i])) conf.set("output", args[++i]); }//w ww .ja va 2s. c o m // OUTPUT properties Path outputPath = new Path(conf.get("output")); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "UserVisits"); conf.set(DBConst.DB_RECORD_READER, JoinRecord.class.getName()); String TABLE_R = "Rankings"; String TABLE_UV = "UserVisits"; conf.set(DBConst.DB_SQL_QUERY, "SELECT sourceIP, SUM(pageRank) as sumPageRank, COUNT(pageRank) as countPageRank, SUM(adRevenue) as totalRevenue " + "FROM " + TABLE_R + " AS R, " + TABLE_UV + " AS UV " + "WHERE R.pageURL = UV.destURL " + "AND UV.visitDate BETWEEN '" + conf.get("date_l") + "' AND '" + conf.get("date_u") + "' " + "GROUP BY UV.sourceIP;"); return conf; }