List of usage examples for org.apache.hadoop.mapred JobConf setOutputCommitter
public void setOutputCommitter(Class<? extends OutputCommitter> theClass)
From source file:com.ricemap.spateDB.operations.Plot.java
License:Apache License
public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int width, int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount, boolean background) throws IOException { JobConf job = new JobConf(Plot.class); job.setJobName("Plot"); job.setMapperClass(PlotMap.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(PlotReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setMapOutputKeyClass(Prism.class); SpatialSite.setShapeClass(job, shape.getClass()); job.setMapOutputValueClass(shape.getClass()); FileSystem inFs = inFile.getFileSystem(job); Prism fileMbr = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false); FileStatus inFileStatus = inFs.getFileStatus(inFile); CellInfo[] cellInfos;// ww w. java 2s. co m GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(inFs, inFile); if (gindex == null) { // A heap file. The map function should partition the file GridInfo gridInfo = new GridInfo(fileMbr.t1, fileMbr.x1, fileMbr.y1, fileMbr.t2, fileMbr.x2, fileMbr.y2); gridInfo.calculateCellDimensions(inFileStatus.getLen(), inFileStatus.getBlockSize()); cellInfos = gridInfo.getAllCells(); // Doesn't make sense to show any partition information in a heap file showBorders = showBlockCount = showRecordCount = false; } else { cellInfos = SpatialSite.cellsOf(inFs, inFile); } // Set cell information in the job configuration to be used by the mapper SpatialSite.setCells(job, cellInfos); // Adjust width and height to maintain aspect ratio if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) { // Fix width and change height height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1)); } else { width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1)); } LOG.info("Creating an image of size " + width + "x" + height); ImageOutputFormat.setFileMBR(job, fileMbr); ImageOutputFormat.setImageWidth(job, width); ImageOutputFormat.setImageHeight(job, height); job.setBoolean(ShowBorders, showBorders); job.setBoolean(ShowBlockCount, showBlockCount); job.setBoolean(ShowRecordCount, showRecordCount); job.setInt(StrokeColor, color.getRGB()); // Set input and output job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); // Set output committer which will stitch images together after all reducers // finish job.setOutputCommitter(PlotOutputCommitter.class); job.setOutputFormat(ImageOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); if (background) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); } else { lastSubmittedJob = JobClient.runJob(job); } }
From source file:com.ricemap.spateDB.operations.Repartition.java
License:Apache License
/** * Repartitions an input file according to the given list of cells. * @param inFile//from ww w . j a v a 2s . c o m * @param outPath * @param cellInfos * @param pack * @param rtree * @param overwrite * @throws IOException */ public static void repartitionMapReduce(Path inFile, Path outPath, Shape stockShape, long blockSize, CellInfo[] cellInfos, String sindex, boolean overwrite, boolean columnar) throws IOException { JobConf job = new JobConf(Repartition.class); job.setJobName("Repartition"); FileSystem outFs = outPath.getFileSystem(job); // Overwrite output file if (outFs.exists(outPath)) { if (overwrite) outFs.delete(outPath, true); else throw new RuntimeException( "Output file '" + outPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global index if (sindex.equals("rtree")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); ShapeInputFormat.setInputPaths(job, inFile); job.setInputFormat(ShapeInputFormat.class); boolean pack = sindex.equals("r+tree"); boolean expand = sindex.equals("rtree"); job.setBoolean(SpatialSite.PACK_CELLS, pack); job.setBoolean(SpatialSite.EXPAND_CELLS, expand); job.setStrings(SpatialSite.STORAGE_MODE, columnar ? "columnar" : "normal"); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); // Set default parameters for reading input file SpatialSite.setShapeClass(job, stockShape.getClass()); FileOutputFormat.setOutputPath(job, outPath); if (sindex.equals("grid")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } // Copy block size from source file if it's globally indexed FileSystem inFs = inFile.getFileSystem(job); if (blockSize == 0) { GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inFile); if (globalIndex != null) { blockSize = inFs.getFileStatus(new Path(inFile, globalIndex.iterator().next().filename)) .getBlockSize(); LOG.info("Automatically setting block size to " + blockSize); } } if (blockSize != 0) job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blockSize); SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); JobClient.runJob(job); }
From source file:edu.umn.cs.spatialHadoop.operations.DistributedCopy.java
License:Open Source License
private static void distributedCopy(Path inputPath, Path outputPath, OperationsParams params) throws IOException { JobConf job = new JobConf(params, DistributedCopy.class); job.setJobName("distcp3"); // Set input//from w w w. jav a 2 s .c om job.setInputFormat(BlockInputFormat.class); BlockInputFormat.addInputPath(job, inputPath); // Set output job.setOutputFormat(BlockOutputFormat.class); BlockOutputFormat.setOutputPath(job, outputPath); job.setOutputCommitter(BlockOutputCommitter.class); // Set number of mappers/reducers ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(0); // Run the job JobClient.runJob(job); }
From source file:edu.umn.cs.spatialHadoop.operations.FileMBR.java
License:Open Source License
/** * Computes the MBR of the input file using an aggregate MapReduce job. * //from ww w . j ava 2s . c o m * @param inFile - Path to input file * @param params - Additional operation parameters * @return * @throws IOException * @throws InterruptedException */ private static <S extends Shape> Partition fileMBRMapReduce(Path[] inFiles, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, FileMBR.class); Path outputPath; FileSystem outFs = FileSystem.get(job); do { outputPath = new Path(inFiles[0].getName() + ".mbr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); job.setJobName("FileMBR"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Partition.class); job.setMapperClass(FileMBRMapper.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Combine.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setInputFormat(ShapeLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, inFiles); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputCommitter(MBROutputCommitter.class); // Submit the job if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); // Use multithreading too job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); } if (params.getBoolean("background", false)) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); return null; } else { lastSubmittedJob = JobClient.runJob(job); Counters counters = lastSubmittedJob.getCounters(); Counter outputSizeCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES); sizeOfLastProcessedFile = outputSizeCounter.getCounter(); FileStatus[] outFiles = outFs.listStatus(outputPath, SpatialSite.NonHiddenFileFilter); Partition mbr = new Partition(); mbr.set(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); OperationsParams localMBRParams = new OperationsParams(params); localMBRParams.setBoolean("local", true); // Enforce local execution localMBRParams.setClass("shape", Partition.class, Shape.class); for (FileStatus outFile : outFiles) { if (outFile.isDir()) continue; ShapeRecordReader<Partition> reader = new ShapeRecordReader<Partition>(localMBRParams, new FileSplit(outFile.getPath(), 0, outFile.getLen(), new String[0])); Rectangle key = reader.createKey(); Partition p = reader.createValue(); while (reader.next(key, p)) { mbr.expand(p); } reader.close(); } outFs.delete(outputPath, true); return mbr; } }
From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java
License:Open Source License
private static RunningJob indexMapReduce(Path inPath, Path outPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Indexer.class); job.setJobName("Indexer"); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) params.getShape("mbr"); if (inputMBR == null) inputMBR = FileMBR.fileMBR(inPath, params); OperationsParams.setShape(job, "mbr", inputMBR); // Set input and output job.setInputFormat(ShapeIterInputFormat.class); ShapeIterInputFormat.setInputPaths(job, inPath); job.setOutputFormat(IndexOutputFormat.class); GridOutputFormat.setOutputPath(job, outPath); // Set the correct partitioner according to index type String index = job.get("sindex"); if (index == null) throw new RuntimeException("Index type is not set"); long t1 = System.currentTimeMillis(); Partitioner partitioner = createPartitioner(inPath, outPath, job, index); Partitioner.setPartitioner(job, partitioner); long t2 = System.currentTimeMillis(); System.out.println("Total time for space subdivision in millis: " + (t2 - t1)); // Set mapper and reducer Shape shape = params.getShape("shape"); job.setMapperClass(IndexMethods.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(IndexMethods.class); job.setOutputCommitter(IndexerOutputCommitter.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); // Use multithreading in case the job is running locally job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (params.getBoolean("background", false)) { // Run in background JobClient jc = new JobClient(job); return jc.submitJob(job); } else {//from w ww .ja va2 s . c o m // Run and block until it is finished return JobClient.runJob(job); } }
From source file:edu.umn.cs.spatialHadoop.operations.Plot.java
License:Apache License
public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int width, int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount, boolean background) throws IOException { JobConf job = new JobConf(Plot.class); job.setJobName("Plot"); job.setMapperClass(PlotMap.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(PlotReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setMapOutputKeyClass(Rectangle.class); SpatialSite.setShapeClass(job, shape.getClass()); job.setMapOutputValueClass(shape.getClass()); FileSystem inFs = inFile.getFileSystem(job); Rectangle fileMbr = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false); FileStatus inFileStatus = inFs.getFileStatus(inFile); CellInfo[] cellInfos;//from w w w . j a v a 2s.com GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(inFs, inFile); if (gindex == null) { // A heap file. The map function should partition the file GridInfo gridInfo = new GridInfo(fileMbr.x1, fileMbr.y1, fileMbr.x2, fileMbr.y2); gridInfo.calculateCellDimensions(inFileStatus.getLen(), inFileStatus.getBlockSize()); cellInfos = gridInfo.getAllCells(); // Doesn't make sense to show any partition information in a heap file showBorders = showBlockCount = showRecordCount = false; } else { cellInfos = SpatialSite.cellsOf(inFs, inFile); } // Set cell information in the job configuration to be used by the mapper SpatialSite.setCells(job, cellInfos); // Adjust width and height to maintain aspect ratio if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) { // Fix width and change height height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1)); } else { width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1)); } LOG.info("Creating an image of size " + width + "x" + height); ImageOutputFormat.setFileMBR(job, fileMbr); ImageOutputFormat.setImageWidth(job, width); ImageOutputFormat.setImageHeight(job, height); job.setBoolean(ShowBorders, showBorders); job.setBoolean(ShowBlockCount, showBlockCount); job.setBoolean(ShowRecordCount, showRecordCount); job.setInt(StrokeColor, color.getRGB()); // Set input and output job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); // Set output committer which will stitch images together after all reducers // finish job.setOutputCommitter(PlotOutputCommitter.class); job.setOutputFormat(ImageOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); if (background) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); } else { lastSubmittedJob = JobClient.runJob(job); } }
From source file:edu.umn.cs.spatialHadoop.operations.PyramidPlot.java
License:Apache License
/** * Plot a file to a set of images in different zoom levels using a MapReduce * program.//from ww w .j a v a 2s.c o m * @param <S> type of shapes stored in file * @param inFile - Path to the input file(s) * @param outFile - Path to the output file (image) * @param shape - A sample object to be used for parsing input file * @param tileWidth - With of each tile * @param tileHeight - Height of each tile * @param vflip - Set to <code>true</code> to file the whole image vertically * @param color - Color used to draw single shapes * @param numLevels - Number of zoom levels to plot * @throws IOException */ private static <S extends Shape> RunningJob plotMapReduce(Path inFile, Path outFile, OperationsParams params) throws IOException { Color color = params.getColor("color", Color.BLACK); String hdfDataset = (String) params.get("dataset"); Shape shape = hdfDataset != null ? new NASARectangle() : params.getShape("shape"); Shape plotRange = params.getShape("rect"); boolean background = params.is("background"); JobConf job = new JobConf(params, PyramidPlot.class); job.setJobName("PlotPyramid"); String partition = job.get("partition", "space").toLowerCase(); if (partition.equals("space")) { job.setMapperClass(SpacePartitionMap.class); job.setReducerClass(SpacePartitionReduce.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(shape.getClass()); job.setInputFormat(ShapeInputFormat.class); } else { job.setMapperClass(DataPartitionMap.class); job.setReducerClass(DataPartitionReduce.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(ImageWritable.class); job.setInputFormat(ShapeArrayInputFormat.class); } job.setInt("color", color.getRGB()); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); if (shape instanceof Point && job.getBoolean("sample", false)) { // Enable adaptive sampling int imageWidthRoot = job.getInt("tilewidth", 256); int imageHeightRoot = job.getInt("tileheight", 256); long recordCount = FileMBR.fileMBR(inFile, params).recordCount; float sampleRatio = params.getFloat(GeometricPlot.AdaptiveSampleFactor, 1.0f) * imageWidthRoot * imageHeightRoot / recordCount; job.setFloat(GeometricPlot.AdaptiveSampleRatio, sampleRatio); } Rectangle fileMBR; if (hdfDataset != null) { // Input is HDF job.set(HDFRecordReader.DatasetName, hdfDataset); job.setBoolean(HDFRecordReader.SkipFillValue, true); job.setClass("shape", NASARectangle.class, Shape.class); // Determine the range of values by opening one of the HDF files Aggregate.MinMax minMax = Aggregate.aggregate(new Path[] { inFile }, params); job.setInt(MinValue, minMax.minValue); job.setInt(MaxValue, minMax.maxValue); //fileMBR = new Rectangle(-180, -90, 180, 90); fileMBR = plotRange != null ? plotRange.getMBR() : new Rectangle(-180, -140, 180, 169); // job.setClass(HDFRecordReader.ProjectorClass, MercatorProjector.class, // GeoProjector.class); } else { fileMBR = FileMBR.fileMBR(inFile, params); } boolean keepAspectRatio = params.is("keep-ratio", true); if (keepAspectRatio) { // Expand input file to a rectangle for compatibility with the pyramid // structure if (fileMBR.getWidth() > fileMBR.getHeight()) { fileMBR.y1 -= (fileMBR.getWidth() - fileMBR.getHeight()) / 2; fileMBR.y2 = fileMBR.y1 + fileMBR.getWidth(); } else { fileMBR.x1 -= (fileMBR.getHeight() - fileMBR.getWidth() / 2); fileMBR.x2 = fileMBR.x1 + fileMBR.getHeight(); } } SpatialSite.setRectangle(job, InputMBR, fileMBR); // Set input and output ShapeInputFormat.addInputPath(job, inFile); if (plotRange != null) { job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); } job.setOutputFormat(PyramidOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); job.setOutputCommitter(PlotPyramidOutputCommitter.class); if (background) { JobClient jc = new JobClient(job); return lastSubmittedJob = jc.submitJob(job); } else { return lastSubmittedJob = JobClient.runJob(job); } }
From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java
License:Open Source License
public static void repartitionMapReduce(Path inFile, Path outPath, CellInfo[] cellInfos, OperationsParams params) throws IOException, InterruptedException { String sindex = params.get("sindex"); boolean overwrite = params.getBoolean("overwrite", false); Shape stockShape = params.getShape("shape"); FileSystem outFs = outPath.getFileSystem(params); // Calculate number of partitions in output file // Copy blocksize from source file if it's globally indexed @SuppressWarnings("deprecation") final long blockSize = outFs.getDefaultBlockSize(); // Calculate the dimensions of each partition based on gindex type if (cellInfos == null) { if (sindex.equals("grid")) { Rectangle input_mbr = FileMBR.fileMBR(inFile, params); long inFileSize = FileMBR.sizeOfLastProcessedFile; int num_partitions = calculateNumberOfPartitions(new Configuration(), inFileSize, outFs, outPath, blockSize);/* w w w . j a va 2s . c om*/ GridInfo gridInfo = new GridInfo(input_mbr.x1, input_mbr.y1, input_mbr.x2, input_mbr.y2); gridInfo.calculateCellDimensions(num_partitions); cellInfos = gridInfo.getAllCells(); } else if (sindex.equals("rtree") || sindex.equals("r+tree") || sindex.equals("str") || sindex.equals("str+")) { // Pack in rectangles using an RTree cellInfos = packInRectangles(inFile, outPath, params); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } } JobConf job = new JobConf(params, Repartition.class); job.setJobName("Repartition"); // Overwrite output file if (outFs.exists(outPath)) { if (overwrite) outFs.delete(outPath, true); else throw new RuntimeException( "Output file '" + outPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid, str+, and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); ShapeInputFormat.setInputPaths(job, inFile); job.setInputFormat(ShapeInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); JobClient.runJob(job); }
From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java
License:Open Source License
/** * Repartitions an input file according to the given list of cells. * @param inFile The input raw file that needs to be indexed. * @param outPath The output path where the index will be written. * @param stockShape An instance of the shapes stored in the input file. * @param blockSize The block size for the constructed index. * @param cellInfos A predefined set of cells to use as a global index * @param sindex The type of index to build. * @param overwrite Whether to overwrite the output or not. * @throws IOException If an exception happens while preparing the job. *///from w ww . j av a 2 s .c o m public static void repartitionMapReduce(Path inFile, Path outPath, Shape stockShape, long blockSize, CellInfo[] cellInfos, String sindex, boolean overwrite) throws IOException { JobConf job = new JobConf(Repartition.class); job.setJobName("Repartition"); FileSystem outFs = outPath.getFileSystem(job); // Overwrite output file if (outFs.exists(outPath)) { if (overwrite) outFs.delete(outPath, true); else throw new RuntimeException( "Output file '" + outPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); ShapeInputFormat.setInputPaths(job, inFile); job.setInputFormat(ShapeInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); if (blockSize != 0) { job.setLong("dfs.block.size", blockSize); job.setLong("fs.local.block.size", blockSize); } JobClient.runJob(job); }
From source file:edu.umn.cs.spatialHadoop.temporal.RepartitionTemporal.java
License:Apache License
public static void repartitionMapReduce(Path[] inputPaths, Path outputPath, OperationsParams params) throws IOException, InterruptedException { String sindex = params.get("sindex"); boolean overwrite = params.getBoolean("overwrite", false); Shape stockShape = params.getShape("shape"); FileSystem outFs = outputPath.getFileSystem(params); @SuppressWarnings("deprecation") final long blockSize = outFs.getDefaultBlockSize(); // Calculate the dimensions of each partition based on gindex type CellInfo[] cellInfos;//from w w w . j av a2 s . co m if (sindex.equals("grid")) { Rectangle inputMBR = FileMBR.fileMBR(inputPaths[0], params); long inputFileSize = FileMBR.sizeOfLastProcessedFile; for (int i = 1; i < inputPaths.length; i++) { Rectangle currentInputMBR = FileMBR.fileMBR(inputPaths[i], params); inputMBR.expand(currentInputMBR); inputFileSize = inputFileSize + FileMBR.sizeOfLastProcessedFile; } int num_partitions = calculateNumberOfPartitions(new Configuration(), inputFileSize, outFs, outputPath, blockSize); GridInfo gridInfo = new GridInfo(inputMBR.x1, inputMBR.y1, inputMBR.x2, inputMBR.y2); gridInfo.calculateCellDimensions(num_partitions); cellInfos = gridInfo.getAllCells(); } else if (sindex.equals("rtree") || sindex.equals("r+tree") || sindex.equals("str") || sindex.equals("str+")) { // Pack in rectangles using an RTree cellInfos = packInRectangles(inputPaths, outputPath, params, null); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobConf job = new JobConf(params, RepartitionTemporal.class); job.setJobName("RepartitionTemporal"); // Overwrite output file if (outFs.exists(outputPath)) { if (overwrite) outFs.delete(outputPath, true); else throw new RuntimeException( "Output file '" + outputPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global // index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid, str+, and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); CombinedSpatialInputFormat.setInputPaths(job, inputPaths); job.setInputFormat(CombinedSpatialInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outputPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); JobClient.runJob(job); }