List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:edu.umn.cs.spatialHadoop.operations.Intersects.java
License:Open Source License
public static <S extends Shape> long intersects(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Intersects.class); LOG.info("Intersects journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {//from w ww . j a v a2 s. c om outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Intersects"); job.setMapperClass(IntersectsMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(IntersectsReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.Overlaps.java
License:Open Source License
public static <S extends Shape> long overlaps(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Overlaps.class); LOG.info("Overlaps journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {//from w w w . j av a 2 s . com outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Overlaps"); job.setMapperClass(OverlapMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(OverlapReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.PyramidPlot.java
License:Apache License
/** * Plot a file to a set of images in different zoom levels using a MapReduce * program.//from w w w . ja v a 2s. co m * @param <S> type of shapes stored in file * @param inFile - Path to the input file(s) * @param outFile - Path to the output file (image) * @param shape - A sample object to be used for parsing input file * @param tileWidth - With of each tile * @param tileHeight - Height of each tile * @param vflip - Set to <code>true</code> to file the whole image vertically * @param color - Color used to draw single shapes * @param numLevels - Number of zoom levels to plot * @throws IOException */ private static <S extends Shape> RunningJob plotMapReduce(Path inFile, Path outFile, OperationsParams params) throws IOException { Color color = params.getColor("color", Color.BLACK); String hdfDataset = (String) params.get("dataset"); Shape shape = hdfDataset != null ? new NASARectangle() : params.getShape("shape"); Shape plotRange = params.getShape("rect"); boolean background = params.is("background"); JobConf job = new JobConf(params, PyramidPlot.class); job.setJobName("PlotPyramid"); String partition = job.get("partition", "space").toLowerCase(); if (partition.equals("space")) { job.setMapperClass(SpacePartitionMap.class); job.setReducerClass(SpacePartitionReduce.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(shape.getClass()); job.setInputFormat(ShapeInputFormat.class); } else { job.setMapperClass(DataPartitionMap.class); job.setReducerClass(DataPartitionReduce.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(ImageWritable.class); job.setInputFormat(ShapeArrayInputFormat.class); } job.setInt("color", color.getRGB()); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); if (shape instanceof Point && job.getBoolean("sample", false)) { // Enable adaptive sampling int imageWidthRoot = job.getInt("tilewidth", 256); int imageHeightRoot = job.getInt("tileheight", 256); long recordCount = FileMBR.fileMBR(inFile, params).recordCount; float sampleRatio = params.getFloat(GeometricPlot.AdaptiveSampleFactor, 1.0f) * imageWidthRoot * imageHeightRoot / recordCount; job.setFloat(GeometricPlot.AdaptiveSampleRatio, sampleRatio); } Rectangle fileMBR; if (hdfDataset != null) { // Input is HDF job.set(HDFRecordReader.DatasetName, hdfDataset); job.setBoolean(HDFRecordReader.SkipFillValue, true); job.setClass("shape", NASARectangle.class, Shape.class); // Determine the range of values by opening one of the HDF files Aggregate.MinMax minMax = Aggregate.aggregate(new Path[] { inFile }, params); job.setInt(MinValue, minMax.minValue); job.setInt(MaxValue, minMax.maxValue); //fileMBR = new Rectangle(-180, -90, 180, 90); fileMBR = plotRange != null ? plotRange.getMBR() : new Rectangle(-180, -140, 180, 169); // job.setClass(HDFRecordReader.ProjectorClass, MercatorProjector.class, // GeoProjector.class); } else { fileMBR = FileMBR.fileMBR(inFile, params); } boolean keepAspectRatio = params.is("keep-ratio", true); if (keepAspectRatio) { // Expand input file to a rectangle for compatibility with the pyramid // structure if (fileMBR.getWidth() > fileMBR.getHeight()) { fileMBR.y1 -= (fileMBR.getWidth() - fileMBR.getHeight()) / 2; fileMBR.y2 = fileMBR.y1 + fileMBR.getWidth(); } else { fileMBR.x1 -= (fileMBR.getHeight() - fileMBR.getWidth() / 2); fileMBR.x2 = fileMBR.x1 + fileMBR.getHeight(); } } SpatialSite.setRectangle(job, InputMBR, fileMBR); // Set input and output ShapeInputFormat.addInputPath(job, inFile); if (plotRange != null) { job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); } job.setOutputFormat(PyramidOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); job.setOutputCommitter(PlotPyramidOutputCommitter.class); if (background) { JobClient jc = new JobClient(job); return lastSubmittedJob = jc.submitJob(job); } else { return lastSubmittedJob = JobClient.runJob(job); } }
From source file:edu.umn.cs.spatialHadoop.operations.RecordCount.java
License:Open Source License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing//from w w w.j a v a 2 s .c o m * @param fs * @param inFile * @return * @throws IOException * @throws InterruptedException */ public static long recordCountMapReduce(FileSystem fs, Path inFile) throws IOException, InterruptedException { JobConf job = new JobConf(RecordCount.class); Path outputPath = new Path(inFile.toUri().getPath() + ".linecount"); FileSystem outFs = outputPath.getFileSystem(job); outFs.delete(outputPath, true); job.setJobName("LineCount"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Reduce.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(1); job.setInputFormat(ShapeLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFile); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job JobClient.runJob(job); // Read job result if (OperationsParams.isLocal(job, inFile)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); // Use multithreading too job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); } long lineCount = 0; FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus fileStatus : results) { if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) { LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath())); Text text = new Text(); if (lineReader.readLine(text) > 0) { lineCount = Long.parseLong(text.toString()); } lineReader.close(); } } outFs.delete(outputPath, true); return lineCount; }
From source file:edu.umn.cs.spatialHadoop.operations.SJMR.java
License:Open Source License
public static <S extends Shape> long sjmr(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, SJMR.class); LOG.info("SJMR journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {//from w w w . j a v a 2 s.c om outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("SJMR"); job.setMapperClass(SJMRMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(SJMRReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.Touches.java
License:Open Source License
public static <S extends Shape> long touches(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Touches.class); LOG.info("Touches journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {// w ww. j av a2s .c om outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Touches"); job.setMapperClass(TouchesMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(TouchesReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.Within.java
License:Open Source License
public static <S extends Shape> long within(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Within.class); LOG.info("Within journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {// w ww. j a v a 2 s . com outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Within"); job.setMapperClass(WithinMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(WithinReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.sthadoop.operations.STJoin.java
License:Open Source License
/** * // w w w.j a va 2s. co m * @param inputPath * @param outputPath * @param params * @return * @throws IOException * @throws Exception * @throws InterruptedException */ private static long stJoin(Path inputPath, Path outputPath, OperationsParams params) throws IOException, Exception, InterruptedException { JobConf conf = new JobConf(new Configuration(), STJoin.class); FileSystem outfs = outputPath.getFileSystem(conf); outfs.delete(outputPath, true); conf.setJobName("STJoin"); // pass params to the join map-reduce conf.set("timedistance", params.get("timedistance")); conf.set("spacedistance", params.get("spacedistance")); // conf.setMapOutputKeyClass(LongWritable.class); // conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); // Mapper settings conf.setMapperClass(STJoinMap.class); // conf.setReducerClass(STJoinReduce.class); // conf.setCombinerClass(STJoinReduce.class); conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setNumReduceTasks(0); JobClient.runJob(conf).waitForCompletion(); outfs = inputPath.getFileSystem(conf); outfs.delete(inputPath); return 0; }
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskLargeDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("aggregation_db_large"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); if (args.length < 1) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }//from w w w . j a va2s. c o m // OUTPUT properties Path outputPath = new Path(args[0]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "UserVisits"); conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT sourceIP, SUM(adRevenue) AS sumAdRevenue " + "FROM UserVisits GROUP BY sourceIP;"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskSmallDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("aggregation_db_small"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); if (args.length < 1) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }//w w w . ja v a2 s . c o m // OUTPUT properties Path outputPath = new Path(args[0]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "UserVisits"); conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT SUBSTRING(sourceIP, 1, 7) AS subSourceIP, SUM(adRevenue) AS sumAdRevenue FROM UserVisits GROUP BY subSourceIP;"); return conf; }