List of usage examples for org.apache.hadoop.mapreduce Job submit
public void submit() throws IOException, InterruptedException, ClassNotFoundException
From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java
License:BSD License
/** * @param args/*from w w w. j av a 2 s . co m*/ * @throws ParseException */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the relationship " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option g1Option = new Option("g1", "first-group", true, "set first group of datasets"); g1Option.setRequired(true); g1Option.setArgName("FIRST GROUP"); g1Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g1Option); Option g2Option = new Option("g2", "second-group", true, "set second group of datasets"); g2Option.setRequired(false); g2Option.setArgName("SECOND GROUP"); g2Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g2Option); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Path path = null; FileSystem fs = FileSystem.get(new Configuration()); ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> firstGroup = new ArrayList<String>(); ArrayList<String> secondGroup = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] firstGroupCmd = cmd.getOptionValues("g1"); String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0]; addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } if (firstGroup.isEmpty()) { System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2."); firstGroup.addAll(secondGroup); } if (secondGroup.isEmpty()) { System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1."); secondGroup.addAll(firstGroup); } // getting dataset ids String datasetNames = ""; String datasetIds = ""; HashMap<String, String> datasetId = new HashMap<String, String>(); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { datasetId.put(it.next(), null); } if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } String firstGroupStr = ""; String secondGroupStr = ""; for (String dataset : firstGroup) { firstGroupStr += datasetId.get(dataset) + ","; } for (String dataset : secondGroup) { secondGroupStr += datasetId.get(dataset) + ","; } firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1); secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1); FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3); String dataAttributesInputDirs = ""; String noRelationship = ""; HashSet<String> dirs = new HashSet<String>(); String dataset1; String dataset2; String datasetId1; String datasetId2; for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } datasetId1 = datasetId.get(dataset1); datasetId2 = datasetId.get(dataset2); if (dataset1.equals(dataset2)) continue; String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) { dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1); dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2); } else { noRelationship += datasetId1 + "-" + datasetId2 + ","; } } } if (dirs.isEmpty()) { System.out.println("All the relationships were already computed."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } for (String dir : dirs) { dataAttributesInputDirs += dir + ","; } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "correlation"; String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/"; FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3); for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i))); } for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size", Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length)); } conf.set("dataset-keys", datasetIds); conf.set("dataset-names", datasetNames); conf.set("first-group", firstGroupStr); conf.set("second-group", secondGroupStr); conf.set("main-dataset-id", datasetId.get(shortDataset.get(0))); if (noRelationship.length() > 0) { conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1)); } conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); conf.set("mapreduce.task.timeout", "2400000"); if (s3) { machineConf.setMachineConfiguration(conf); conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); conf.set("bucket", s3bucket); } if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } Job job = new Job(conf); job.setJobName(jobName); job.setMapOutputKeyClass(PairAttributeWritable.class); job.setMapOutputValueClass(SpatioTemporalValueWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CorrelationTechniquesMapper.class); job.setReducerClass(CorrelationTechniquesReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1)); FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir)); job.setJarByClass(CorrelationTechniques.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } if (dataset1.equals(dataset2)) continue; String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-" + dataset2 + "/"; String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } } }
From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java
License:Open Source License
/** * Run the DT algorithm in MapReduce//from w ww . j ava2 s . co m * @param inPaths * @param outPath * @param params * @return * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static Job delaunayMapReduce(Path[] inPaths, Path outPath, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "Delaunay Triangulation"); job.setJarByClass(DelaunayTriangulation.class); // Set map and reduce job.setMapperClass(DelaunayMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Triangulation.class); job.setReducerClass(DelaunayReduce.class); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPaths); job.setOutputFormatClass(DelaunayTriangulationOutputFormat.class); TextOutputFormat.setOutputPath(job, outPath); // Set column boundaries to define the boundaries of each reducer SpatialSite.splitReduceSpace(job, inPaths, params); // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(params.getBoolean("verbose", false)); if (!job.isSuccessful()) throw new RuntimeException("Job failed!"); } else { job.submit(); } return job; }
From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java
License:Open Source License
private static Job indexMapReduce(Path inPath, Path outPath, OperationsParams paramss) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(paramss, "Indexer"); Configuration conf = job.getConfiguration(); job.setJarByClass(Indexer.class); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr"); if (inputMBR == null) { inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf)); OperationsParams.setShape(conf, "mbr", inputMBR); }/*from ww w . j a va 2 s. co m*/ // Set the correct partitioner according to index type String index = conf.get("sindex"); if (index == null) throw new RuntimeException("Index type is not set"); long t1 = System.currentTimeMillis(); setLocalIndexer(conf, index); Partitioner partitioner = createPartitioner(inPath, outPath, conf, index); Partitioner.setPartitioner(conf, partitioner); long t2 = System.currentTimeMillis(); System.out.println("Total time for space subdivision in millis: " + (t2 - t1)); // Set mapper and reducer Shape shape = OperationsParams.getShape(conf, "shape"); job.setMapperClass(PartitionerMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(PartitionerReduce.class); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPath); job.setOutputFormatClass(IndexOutputFormat.class); IndexOutputFormat.setOutputPath(job, outPath); // Set number of reduce tasks according to cluster status ClusterStatus clusterStatus = new JobClient(new JobConf()).getClusterStatus(); job.setNumReduceTasks(Math.max(1, Math.min(partitioner.getPartitionCount(), (clusterStatus.getMaxReduceTasks() * 9) / 10))); // Use multithreading in case the job is running locally conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (conf.getBoolean("background", false)) { // Run in background job.submit(); } else { job.waitForCompletion(conf.getBoolean("verbose", false)); } return job; }
From source file:edu.umn.cs.spatialHadoop.operations.ClosestPair.java
License:Open Source License
public static Job closestPairMapReduce(Path[] inPaths, Path outPath, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "Closest Pair"); job.setJarByClass(ClosestPair.class); Shape shape = params.getShape("shape"); // Set map and reduce job.setMapperClass(ClosestPairMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(ClosestPairReduce.class); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPaths); job.setOutputFormatClass(ClosestPairOutputFormat.class); TextOutputFormat.setOutputPath(job, outPath); // Set column boundaries to define the boundaries of each reducer SpatialSite.splitReduceSpace(job, inPaths, params); // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(params.getBoolean("verbose", false)); if (!job.isSuccessful()) throw new RuntimeException("Job failed!"); } else {//from w ww . j a v a 2 s.c o m job.submit(); } return job; }
From source file:edu.umn.cs.spatialHadoop.operations.FarthestPair.java
License:Open Source License
public static Job farthestPairMapReduce(Path inFile, Path outPath, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "FarthestPair"); job.setJarByClass(FarthestPair.class); job.setMapperClass(FarthestPairMap.class); job.setReducerClass(FarthestPairReducer.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(PairDistance.class); job.setInputFormatClass(SpatialInputFormat3.class); // Add input file twice to treat it as a binary function SpatialInputFormat3.addInputPath(job, inFile); job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outPath); // Calculate an initial lower bound on the farthest pair from the global index FileSystem fs = inFile.getFileSystem(params); GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, inFile); if (gindex != null) { double tightLowerBound = 0; for (Partition p1 : gindex) { for (Partition p2 : gindex) { double lb = computeLB(p1, p2); if (lb > tightLowerBound) tightLowerBound = lb; }/*from ww w .j av a 2 s. com*/ } job.getConfiguration().setFloat(FarthestPairLowerBound, (float) tightLowerBound); } // Start the job if (params.getBoolean("background", false)) { // Run in background job.submit(); } else { job.waitForCompletion(params.getBoolean("verbose", false)); } return job; }
From source file:edu.umn.cs.spatialHadoop.operations.RangeQuery.java
License:Open Source License
public static Job rangeQueryMapReduce(Path inFile, Path outFile, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { // Use the built-in range filter of the input format params.set(SpatialInputFormat3.InputQueryRange, params.get("rect")); // Use multithreading in case it is running locally params.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); Job job = new Job(params, "RangeQuery"); job.setJarByClass(RangeQuery.class); job.setNumReduceTasks(0);/* w w w . j a va2s. co m*/ job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inFile); job.setMapperClass(RangeQueryMap.class); if (params.getBoolean("output", true) && outFile != null) { job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outFile); } else { // Skip writing the output for the sake of debugging job.setOutputFormatClass(NullOutputFormat.class); } // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(false); } else { job.submit(); } return job; }
From source file:edu.umn.cs.spatialHadoop.operations.Sampler2.java
License:Open Source License
public static Job sampleMapReduce(Path[] files, Path output, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "Sampler2"); job.setJarByClass(Sampler2.class); // Set input and output job.setInputFormatClass(SampleInputFormat.class); SampleInputFormat.setInputPaths(job, files); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, output); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); // No reducer needed // Start the job if (params.getBoolean("background", false)) { // Run in background job.submit(); } else {/*from w w w. jav a 2 s .c o m*/ job.waitForCompletion(params.getBoolean("verbose", false)); } return job; }
From source file:edu.umn.cs.spatialHadoop.operations.UltimateUnion.java
License:Open Source License
private static Job ultimateUnionMapReduce(Path input, Path output, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "UltimateUnion"); job.setJarByClass(UltimateUnion.class); // Set map and reduce job.setMapperClass(UltimateUnionMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(OGCJTSShape.class); job.setNumReduceTasks(0);// www . j a v a2 s . c o m // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.addInputPath(job, input); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, output); // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(false); if (!job.isSuccessful()) throw new RuntimeException("Job failed!"); } else { job.submit(); } return job; }
From source file:edu.umn.cs.spatialHadoop.operations.Union.java
License:Open Source License
private static Job unionMapReduce(Path input, Path output, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "BasicUnion"); job.setJarByClass(Union.class); // Set map and reduce job.setMapperClass(UnionMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(OGCJTSShape.class); job.setReducerClass(UnionReduce.class); SpatialSite.splitReduceSpace(job, new Path[] { input }, params); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.addInputPath(job, input); job.setOutputFormatClass(UnionOutputFormat.class); TextOutputFormat.setOutputPath(job, output); // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(false);/*w w w . j av a2s . c o m*/ if (!job.isSuccessful()) throw new RuntimeException("Job failed!"); } else { job.submit(); } return job; }
From source file:edu.umn.cs.spatialHadoop.visualization.MultilevelPlot.java
License:Open Source License
private static Job plotMapReduce(Path[] inFiles, Path outFile, Class<? extends Plotter> plotterClass, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Plotter plotter;/* w w w .ja v a2 s . c om*/ try { plotter = plotterClass.newInstance(); } catch (InstantiationException e) { throw new RuntimeException("Error creating rastierizer", e); } catch (IllegalAccessException e) { throw new RuntimeException("Error creating rastierizer", e); } Job job = new Job(params, "MultilevelPlot"); job.setJarByClass(SingleLevelPlot.class); // Set plotter Configuration conf = job.getConfiguration(); Plotter.setPlotter(conf, plotterClass); // Set input file MBR Rectangle inputMBR = (Rectangle) params.getShape("mbr"); if (inputMBR == null) inputMBR = FileMBR.fileMBR(inFiles, params); // Adjust width and height if aspect ratio is to be kept if (params.getBoolean("keepratio", true)) { // Expand input file to a rectangle for compatibility with the pyramid // structure if (inputMBR.getWidth() > inputMBR.getHeight()) { inputMBR.y1 -= (inputMBR.getWidth() - inputMBR.getHeight()) / 2; inputMBR.y2 = inputMBR.y1 + inputMBR.getWidth(); } else { inputMBR.x1 -= (inputMBR.getHeight() - inputMBR.getWidth()) / 2; inputMBR.x2 = inputMBR.x1 + inputMBR.getHeight(); } } OperationsParams.setShape(conf, InputMBR, inputMBR); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inFiles); if (conf.getBoolean("output", true)) { job.setOutputFormatClass(PyramidOutputFormat2.class); PyramidOutputFormat2.setOutputPath(job, outFile); } else { job.setOutputFormatClass(NullOutputFormat.class); } // Set mapper, reducer and committer String partitionTechnique = params.get("partition", "flat"); if (partitionTechnique.equalsIgnoreCase("flat")) { // Use flat partitioning job.setMapperClass(FlatPartitionMap.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(plotter.getCanvasClass()); job.setReducerClass(FlatPartitionReduce.class); } else if (partitionTechnique.equalsIgnoreCase("pyramid")) { // Use pyramid partitioning Shape shape = params.getShape("shape"); job.setMapperClass(PyramidPartitionMap.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(PyramidPartitionReduce.class); } else { throw new RuntimeException("Unknown partitioning technique '" + partitionTechnique + "'"); } // Set number of reducers job.setNumReduceTasks( Math.max(1, new JobClient(new JobConf()).getClusterStatus().getMaxReduceTasks() * 7 / 8)); // Use multithreading in case the job is running locally conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (params.getBoolean("background", false)) { job.submit(); } else { job.waitForCompletion(false); } return job; }