List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass
public void setOutputValueClass(Class<?> theClass)
From source file:edu.umd.cloud9.webgraph.driver.SortWebGraph.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 4) { printUsage();//w w w.j a v a 2 s.com return -1; } JobConf conf = new JobConf(getConf(), SortWebGraph.class); FileSystem fs = FileSystem.get(conf); String inputPath = args[0]; String outputPath = args[1]; int numberOfDocuments = Integer.parseInt(args[2]); int numMappers = 1; int numReducers = Integer.parseInt(args[3]); conf.setJobName("SortWebGraph"); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInt("mapred.task.timeout", 60000000); conf.set("mapreduce.map.memory.mb", "2048"); conf.set("mapreduce.map.java.opts", "-Xmx2048m"); conf.set("mapreduce.reduce.memory.mb", "2048"); conf.set("mapreduce.reduce.java.opts", "-Xmx2048m"); conf.set("mapreduce.task.timeout", "60000000"); if (numberOfDocuments == 0) { numberOfDocuments = DEFAULT_NUMBER_OF_DOCUMENTS; } conf.setInt("Cloud9.NumberOfDocuments", numberOfDocuments); conf.setNumMapTasks(numMappers); conf.setNumReduceTasks(numReducers); conf.setMapperClass(IdentityMapper.class); conf.setPartitionerClass(Partition.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(ArrayListWritable.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(ArrayListWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); SequenceFileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); LOG.info("SortAnchorText"); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of documents: " + conf.getInt("Cloud9.NumberOfDocuments", DEFAULT_NUMBER_OF_DOCUMENTS)); fs.delete(new Path(outputPath)); JobClient.runJob(conf); return 0; }
From source file:edu.umd.cloud9.webgraph.ExtractLinks.java
License:Apache License
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), ExtractLinks.class); FileSystem fs = FileSystem.get(conf); int numMappers = conf.getInt("Cloud9.Mappers", 1); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); String outputPath = conf.get("Cloud9.OutputPath"); String mappingFile = conf.get("Cloud9.DocnoMappingFile"); if (!fs.exists(new Path(mappingFile))) throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!"); DistributedCache.addCacheFile(new URI(mappingFile), conf); conf.setJobName("ExtractLinks"); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInt("mapred.task.timeout", 60000000); conf.setNumMapTasks(numMappers);/* ww w .j ava 2 s . com*/ conf.setNumReduceTasks(numReducers); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ArrayListWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); SequenceFileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); LOG.info("ExtractLinks"); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - mapping file: " + mappingFile); LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false)); if (!fs.exists(new Path(outputPath))) { JobClient.runJob(conf); } else { LOG.info(outputPath + " already exists! Skipping this step..."); } return 0; }
From source file:edu.umn.cs.spatialHadoop.operations.ConvexHull.java
License:Open Source License
public static void convexHullMapReduce(Path inFile, Path userOutPath, OperationsParams params) throws IOException { JobConf job = new JobConf(params, ConvexHull.class); Path outPath = userOutPath;/*from ww w .j a v a2s.co m*/ FileSystem outFs = (userOutPath == null ? inFile : userOutPath).getFileSystem(job); Shape shape = params.getShape("shape"); if (outPath == null) { do { outPath = new Path(inFile.toUri().getPath() + ".convex_hull_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outPath)); } else { if (outFs.exists(outPath)) { if (params.getBoolean("overwrite", false)) { outFs.delete(outPath, true); } else { throw new RuntimeException("Output path already exists and -overwrite flag is not set"); } } } job.setJobName("ConvexHull"); job.setClass(SpatialSite.FilterClass, ConvexHullFilter.class, BlockFilter.class); job.setMapperClass(IdentityMapper.class); job.setCombinerClass(ConvexHullReducer.class); job.setReducerClass(ConvexHullReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(shape.getClass()); job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); job.setOutputFormat(GridOutputFormat2.class); GridOutputFormat2.setOutputPath(job, outPath); JobClient.runJob(job); // If outputPath not set by user, automatically delete it if (userOutPath == null) outFs.delete(outPath, true); }
From source file:edu.umn.cs.spatialHadoop.operations.Skyline.java
License:Open Source License
private static void skylineMapReduce(Path inFile, Path userOutPath, OperationsParams params) throws IOException { JobConf job = new JobConf(params, Skyline.class); Path outPath = userOutPath;/*ww w .j av a 2 s . c o m*/ FileSystem outFs = (userOutPath == null ? inFile : userOutPath).getFileSystem(job); Shape shape = params.getShape("shape"); if (outPath == null) { do { outPath = new Path(inFile.toUri().getPath() + ".skyline_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outPath)); } job.setJobName("Skyline"); job.setClass(SpatialSite.FilterClass, SkylineFilter.class, BlockFilter.class); job.setMapperClass(IdentityMapper.class); job.setCombinerClass(SkylineReducer.class); job.setReducerClass(SkylineReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(shape.getClass()); job.setInputFormat(ShapeIterInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); job.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outPath); JobClient.runJob(job); // If outputPath not set by user, automatically delete it if (userOutPath == null) outFs.delete(outPath, true); }
From source file:edu.umn.cs.sthadoop.operations.STJoin.java
License:Open Source License
/** * /*w w w . j av a 2 s . com*/ * @param inputPath * @param outputPath * @param params * @return * @throws IOException * @throws Exception * @throws InterruptedException */ private static long stJoin(Path inputPath, Path outputPath, OperationsParams params) throws IOException, Exception, InterruptedException { JobConf conf = new JobConf(new Configuration(), STJoin.class); FileSystem outfs = outputPath.getFileSystem(conf); outfs.delete(outputPath, true); conf.setJobName("STJoin"); // pass params to the join map-reduce conf.set("timedistance", params.get("timedistance")); conf.set("spacedistance", params.get("spacedistance")); // conf.setMapOutputKeyClass(LongWritable.class); // conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); // Mapper settings conf.setMapperClass(STJoinMap.class); // conf.setReducerClass(STJoinReduce.class); // conf.setCombinerClass(STJoinReduce.class); conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setNumReduceTasks(0); JobClient.runJob(conf).waitForCompletion(); outfs = inputPath.getFileSystem(conf); outfs.delete(inputPath); return 0; }
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskLargeDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("aggregation_db_large"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); if (args.length < 1) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }//from w ww .j a v a 2s .c o m // OUTPUT properties Path outputPath = new Path(args[0]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "UserVisits"); conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT sourceIP, SUM(adRevenue) AS sumAdRevenue " + "FROM UserVisits GROUP BY sourceIP;"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskLargeHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("aggregation_hdfs_large"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(AggTaskLargeHDFS.Map.class); conf.setCombinerClass(AggTaskLargeHDFS.Reduce.class); conf.setReducerClass(AggTaskLargeHDFS.Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 2) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/*from w w w . j av a 2 s. c o m*/ FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskSmallDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("aggregation_db_small"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); if (args.length < 1) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }//from w w w .j av a2 s. co m // OUTPUT properties Path outputPath = new Path(args[0]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "UserVisits"); conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT SUBSTRING(sourceIP, 1, 7) AS subSourceIP, SUM(adRevenue) AS sumAdRevenue FROM UserVisits GROUP BY subSourceIP;"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskSmallHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(this.getClass()); conf.setJobName("aggregation_hdfs_small"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(AggTaskSmallHDFS.Map.class); conf.setCombinerClass(AggTaskSmallHDFS.Reduce.class); conf.setReducerClass(AggTaskSmallHDFS.Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 2) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }// www . ja v a2 s . c om FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(GrepTaskDB.class); conf.setJobName("grep_db_job"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);//from www . ja v a 2 s. c o m // GREP arguments conf.setOutputFormat(TextOutputFormat.class); for (int i = 0; i < args.length; ++i) { if ("-pattern".equals(args[i])) conf.set("pattern", args[++i]); else if ("-output".equals(args[i])) conf.set("output", args[++i]); } // OUTPUT properties Path outputPath = new Path(conf.get("output")); System.out.println(conf.get("output")); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); // DB properties conf.set(DBConst.DB_RELATION_ID, "grep"); conf.set(DBConst.DB_RECORD_READER, DocumentsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT key1, field FROM grep WHERE field LIKE '%" + conf.get("pattern") + "%';"); return conf; }