List of usage examples for org.apache.hadoop.mapred LocalJobRunner LOCAL_MAX_MAPS
String LOCAL_MAX_MAPS
To view the source code for org.apache.hadoop.mapred LocalJobRunner LOCAL_MAX_MAPS.
Click Source Link
From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java
License:Open Source License
private static Job indexMapReduce(Path inPath, Path outPath, OperationsParams paramss) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(paramss, "Indexer"); Configuration conf = job.getConfiguration(); job.setJarByClass(Indexer.class); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr"); if (inputMBR == null) { inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf)); OperationsParams.setShape(conf, "mbr", inputMBR); }//from w w w . j av a 2s .c o m // Set the correct partitioner according to index type String index = conf.get("sindex"); if (index == null) throw new RuntimeException("Index type is not set"); long t1 = System.currentTimeMillis(); setLocalIndexer(conf, index); Partitioner partitioner = createPartitioner(inPath, outPath, conf, index); Partitioner.setPartitioner(conf, partitioner); long t2 = System.currentTimeMillis(); System.out.println("Total time for space subdivision in millis: " + (t2 - t1)); // Set mapper and reducer Shape shape = OperationsParams.getShape(conf, "shape"); job.setMapperClass(PartitionerMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(PartitionerReduce.class); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPath); job.setOutputFormatClass(IndexOutputFormat.class); IndexOutputFormat.setOutputPath(job, outPath); // Set number of reduce tasks according to cluster status ClusterStatus clusterStatus = new JobClient(new JobConf()).getClusterStatus(); job.setNumReduceTasks(Math.max(1, Math.min(partitioner.getPartitionCount(), (clusterStatus.getMaxReduceTasks() * 9) / 10))); // Use multithreading in case the job is running locally conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (conf.getBoolean("background", false)) { // Run in background job.submit(); } else { job.waitForCompletion(conf.getBoolean("verbose", false)); } return job; }
From source file:edu.umn.cs.spatialHadoop.nasa.DistributedAggregateSpatioTemporalIndexer.java
License:Open Source License
/** * Build a bunch of AggregateQuadTrees using a Map-Reduce job * /*from w w w .j av a2 s . c om*/ * @param inputPathsDictionaryPath * @param params * @throws IOException */ public static void aggregateQuadTreeMapReduce(Path inputPathsDictionaryPath, OperationsParams params) throws IOException { // configure a map-reduce job JobConf job = new JobConf(params, DistributedAggregateSpatioTemporalIndexer.class); Path outputPath; String outputPathPrefix = "aggQuadTree_"; FileSystem outFs = FileSystem.get(job); do { outputPath = new Path(outputPathPrefix + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); job.setJobName("AggregateQuadTree"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapperClass(AggregateQuadTreeMaper.class); job.set(HDFSIndexPath, hdfsIndexPath.toString()); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); TextInputFormat.setInputPaths(job, inputPathsDictionaryPath); TextOutputFormat.setOutputPath(job, outputPath); if (job.getBoolean("local", false)) { // Enforce local execution if explicitly set by user or for small // files job.set("mapred.job.tracker", "local"); // Use multithreading too job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, 16); } job.setNumReduceTasks(0); // Submit the job JobClient.runJob(job); outFs.delete(outputPath, true); }
From source file:edu.umn.cs.spatialHadoop.operations.FileMBR.java
License:Open Source License
/** * Computes the MBR of the input file using an aggregate MapReduce job. * //from w ww.j a va2 s. com * @param inFile - Path to input file * @param params - Additional operation parameters * @return * @throws IOException * @throws InterruptedException */ private static <S extends Shape> Partition fileMBRMapReduce(Path[] inFiles, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, FileMBR.class); Path outputPath; FileSystem outFs = FileSystem.get(job); do { outputPath = new Path(inFiles[0].getName() + ".mbr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); job.setJobName("FileMBR"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Partition.class); job.setMapperClass(FileMBRMapper.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Combine.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setInputFormat(ShapeLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, inFiles); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputCommitter(MBROutputCommitter.class); // Submit the job if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); // Use multithreading too job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); } if (params.getBoolean("background", false)) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); return null; } else { lastSubmittedJob = JobClient.runJob(job); Counters counters = lastSubmittedJob.getCounters(); Counter outputSizeCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES); sizeOfLastProcessedFile = outputSizeCounter.getCounter(); FileStatus[] outFiles = outFs.listStatus(outputPath, SpatialSite.NonHiddenFileFilter); Partition mbr = new Partition(); mbr.set(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); OperationsParams localMBRParams = new OperationsParams(params); localMBRParams.setBoolean("local", true); // Enforce local execution localMBRParams.setClass("shape", Partition.class, Shape.class); for (FileStatus outFile : outFiles) { if (outFile.isDir()) continue; ShapeRecordReader<Partition> reader = new ShapeRecordReader<Partition>(localMBRParams, new FileSplit(outFile.getPath(), 0, outFile.getLen(), new String[0])); Rectangle key = reader.createKey(); Partition p = reader.createValue(); while (reader.next(key, p)) { mbr.expand(p); } reader.close(); } outFs.delete(outputPath, true); return mbr; } }
From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java
License:Open Source License
private static RunningJob indexMapReduce(Path inPath, Path outPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Indexer.class); job.setJobName("Indexer"); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) params.getShape("mbr"); if (inputMBR == null) inputMBR = FileMBR.fileMBR(inPath, params); OperationsParams.setShape(job, "mbr", inputMBR); // Set input and output job.setInputFormat(ShapeIterInputFormat.class); ShapeIterInputFormat.setInputPaths(job, inPath); job.setOutputFormat(IndexOutputFormat.class); GridOutputFormat.setOutputPath(job, outPath); // Set the correct partitioner according to index type String index = job.get("sindex"); if (index == null) throw new RuntimeException("Index type is not set"); long t1 = System.currentTimeMillis(); Partitioner partitioner = createPartitioner(inPath, outPath, job, index); Partitioner.setPartitioner(job, partitioner); long t2 = System.currentTimeMillis(); System.out.println("Total time for space subdivision in millis: " + (t2 - t1)); // Set mapper and reducer Shape shape = params.getShape("shape"); job.setMapperClass(IndexMethods.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(IndexMethods.class); job.setOutputCommitter(IndexerOutputCommitter.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); // Use multithreading in case the job is running locally job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (params.getBoolean("background", false)) { // Run in background JobClient jc = new JobClient(job); return jc.submitJob(job); } else {/*www. ja va 2s . c o m*/ // Run and block until it is finished return JobClient.runJob(job); } }
From source file:edu.umn.cs.spatialHadoop.operations.RangeQuery.java
License:Open Source License
public static Job rangeQueryMapReduce(Path inFile, Path outFile, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { // Use the built-in range filter of the input format params.set(SpatialInputFormat3.InputQueryRange, params.get("rect")); // Use multithreading in case it is running locally params.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); Job job = new Job(params, "RangeQuery"); job.setJarByClass(RangeQuery.class); job.setNumReduceTasks(0);// w w w . j a va2s . c o m job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inFile); job.setMapperClass(RangeQueryMap.class); if (params.getBoolean("output", true) && outFile != null) { job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outFile); } else { // Skip writing the output for the sake of debugging job.setOutputFormatClass(NullOutputFormat.class); } // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(false); } else { job.submit(); } return job; }
From source file:edu.umn.cs.spatialHadoop.operations.RecordCount.java
License:Open Source License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing/*w w w.j a va2s . co m*/ * @param fs * @param inFile * @return * @throws IOException * @throws InterruptedException */ public static long recordCountMapReduce(FileSystem fs, Path inFile) throws IOException, InterruptedException { JobConf job = new JobConf(RecordCount.class); Path outputPath = new Path(inFile.toUri().getPath() + ".linecount"); FileSystem outFs = outputPath.getFileSystem(job); outFs.delete(outputPath, true); job.setJobName("LineCount"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Reduce.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(1); job.setInputFormat(ShapeLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFile); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job JobClient.runJob(job); // Read job result if (OperationsParams.isLocal(job, inFile)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); // Use multithreading too job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); } long lineCount = 0; FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus fileStatus : results) { if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) { LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath())); Text text = new Text(); if (lineReader.readLine(text) > 0) { lineCount = Long.parseLong(text.toString()); } lineReader.close(); } } outFs.delete(outputPath, true); return lineCount; }
From source file:edu.umn.cs.spatialHadoop.visualization.MultilevelPlot.java
License:Open Source License
private static Job plotMapReduce(Path[] inFiles, Path outFile, Class<? extends Plotter> plotterClass, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Plotter plotter;/* www.j ava2s .c om*/ try { plotter = plotterClass.newInstance(); } catch (InstantiationException e) { throw new RuntimeException("Error creating rastierizer", e); } catch (IllegalAccessException e) { throw new RuntimeException("Error creating rastierizer", e); } Job job = new Job(params, "MultilevelPlot"); job.setJarByClass(SingleLevelPlot.class); // Set plotter Configuration conf = job.getConfiguration(); Plotter.setPlotter(conf, plotterClass); // Set input file MBR Rectangle inputMBR = (Rectangle) params.getShape("mbr"); if (inputMBR == null) inputMBR = FileMBR.fileMBR(inFiles, params); // Adjust width and height if aspect ratio is to be kept if (params.getBoolean("keepratio", true)) { // Expand input file to a rectangle for compatibility with the pyramid // structure if (inputMBR.getWidth() > inputMBR.getHeight()) { inputMBR.y1 -= (inputMBR.getWidth() - inputMBR.getHeight()) / 2; inputMBR.y2 = inputMBR.y1 + inputMBR.getWidth(); } else { inputMBR.x1 -= (inputMBR.getHeight() - inputMBR.getWidth()) / 2; inputMBR.x2 = inputMBR.x1 + inputMBR.getHeight(); } } OperationsParams.setShape(conf, InputMBR, inputMBR); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inFiles); if (conf.getBoolean("output", true)) { job.setOutputFormatClass(PyramidOutputFormat2.class); PyramidOutputFormat2.setOutputPath(job, outFile); } else { job.setOutputFormatClass(NullOutputFormat.class); } // Set mapper, reducer and committer String partitionTechnique = params.get("partition", "flat"); if (partitionTechnique.equalsIgnoreCase("flat")) { // Use flat partitioning job.setMapperClass(FlatPartitionMap.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(plotter.getCanvasClass()); job.setReducerClass(FlatPartitionReduce.class); } else if (partitionTechnique.equalsIgnoreCase("pyramid")) { // Use pyramid partitioning Shape shape = params.getShape("shape"); job.setMapperClass(PyramidPartitionMap.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(PyramidPartitionReduce.class); } else { throw new RuntimeException("Unknown partitioning technique '" + partitionTechnique + "'"); } // Set number of reducers job.setNumReduceTasks( Math.max(1, new JobClient(new JobConf()).getClusterStatus().getMaxReduceTasks() * 7 / 8)); // Use multithreading in case the job is running locally conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (params.getBoolean("background", false)) { job.submit(); } else { job.waitForCompletion(false); } return job; }
From source file:edu.umn.cs.spatialHadoop.visualization.SingleLevelPlot.java
License:Open Source License
/** * Generates a single level using a MapReduce job and returns the created job. * @param inFiles/*from www.j a va2s . co m*/ * @param outFile * @param plotterClass * @param params * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static Job plotMapReduce(Path[] inFiles, Path outFile, Class<? extends Plotter> plotterClass, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Plotter plotter; try { plotter = plotterClass.newInstance(); } catch (InstantiationException e) { throw new RuntimeException("Error creating rastierizer", e); } catch (IllegalAccessException e) { throw new RuntimeException("Error creating rastierizer", e); } Job job = new Job(params, "SingleLevelPlot"); job.setJarByClass(SingleLevelPlot.class); job.setJobName("SingleLevelPlot"); // Set plotter Configuration conf = job.getConfiguration(); Plotter.setPlotter(conf, plotterClass); // Set input file MBR Rectangle inputMBR = (Rectangle) params.getShape("mbr"); Rectangle drawRect = (Rectangle) params.getShape("rect"); if (inputMBR == null) inputMBR = drawRect != null ? drawRect : FileMBR.fileMBR(inFiles, params); OperationsParams.setShape(conf, InputMBR, inputMBR); if (drawRect != null) OperationsParams.setShape(conf, SpatialInputFormat3.InputQueryRange, drawRect); // Adjust width and height if aspect ratio is to be kept int imageWidth = conf.getInt("width", 1000); int imageHeight = conf.getInt("height", 1000); if (params.getBoolean("keepratio", true)) { // Adjust width and height to maintain aspect ratio if (inputMBR.getWidth() / inputMBR.getHeight() > (double) imageWidth / imageHeight) { // Fix width and change height imageHeight = (int) (inputMBR.getHeight() * imageWidth / inputMBR.getWidth()); // Make divisible by two for compatibility with ffmpeg if (imageHeight % 2 == 1) imageHeight--; conf.setInt("height", imageHeight); } else { imageWidth = (int) (inputMBR.getWidth() * imageHeight / inputMBR.getHeight()); conf.setInt("width", imageWidth); } } boolean merge = conf.getBoolean("merge", true); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inFiles); if (conf.getBoolean("output", true)) { if (merge) { job.setOutputFormatClass(CanvasOutputFormat.class); conf.setClass("mapred.output.committer.class", CanvasOutputFormat.ImageWriterOld.class, org.apache.hadoop.mapred.OutputCommitter.class); } else { job.setOutputFormatClass(ImageOutputFormat.class); } CanvasOutputFormat.setOutputPath(job, outFile); } else { job.setOutputFormatClass(NullOutputFormat.class); } // Set mapper and reducer based on the partitioning scheme String partition = conf.get("partition", "none"); ClusterStatus clusterStatus = new JobClient(new JobConf()).getClusterStatus(); if (partition.equalsIgnoreCase("none")) { LOG.info("Using no-partition plot"); job.setMapperClass(NoPartitionPlotMap.class); job.setCombinerClass(NoPartitionPlotCombine.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(plotter.getCanvasClass()); if (merge) { int numSplits = new SpatialInputFormat3().getSplits(job).size(); job.setReducerClass(NoPartitionPlotReduce.class); // Set number of reduce tasks according to cluster status int maxReduce = Math.max(1, clusterStatus.getMaxReduceTasks() * 7 / 8); job.setNumReduceTasks(Math.max(1, Math.min(maxReduce, numSplits / maxReduce))); } else { job.setNumReduceTasks(0); } } else { LOG.info("Using repartition plot"); Partitioner partitioner; if (partition.equals("pixel")) { // Special case for pixel level partitioning as it depends on the // visualization parameters partitioner = new GridPartitioner(inputMBR, imageWidth, imageHeight); } else if (partition.equals("grid")) { int numBlocks = 0; for (Path in : inFiles) { FileSystem fs = in.getFileSystem(params); long size = FileUtil.getPathSize(fs, in); long blockSize = fs.getDefaultBlockSize(in); numBlocks += Math.ceil(size / (double) blockSize); } int numPartitions = numBlocks * 1000; int gridSize = (int) Math.ceil(Math.sqrt(numPartitions)); partitioner = new GridPartitioner(inputMBR, gridSize, gridSize); } else { // Use a standard partitioner as created by the indexer partitioner = Indexer.createPartitioner(inFiles, outFile, conf, partition); } Shape shape = params.getShape("shape"); job.setMapperClass(RepartitionPlotMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(RepartitionPlotReduce.class); // Set number of reducers according to cluster size job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks() * 9 / 10)); Partitioner.setPartitioner(conf, partitioner); } // Use multithreading in case the job is running locally conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (params.getBoolean("background", false)) { // Run in background job.submit(); } else { job.waitForCompletion(params.getBoolean("verbose", false)); } return job; }
From source file:edu.umn.cs.sthadoop.trajectory.TrajectoryOverlap.java
License:Open Source License
public static Job rangeQueryMapReduce(Path inFile, Path outFile, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { // Use the built-in range filter of the input format params.set(SpatialInputFormat3.InputQueryRange, params.get("rect")); // Use multithreading in case it is running locally params.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); Job job = new Job(params, "Traj-KNN-distance"); job.setJarByClass(RangeQuery.class); job.setNumReduceTasks(0);/*from ww w .j a v a 2 s . c om*/ job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inFile); job.setMapperClass(RangeQueryMap.class); if (params.getBoolean("output", true) && outFile != null) { job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outFile); } else { // Skip writing the output for the sake of debugging job.setOutputFormatClass(NullOutputFormat.class); } // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(false); } else { job.submit(); } return job; }