List of usage examples for org.apache.hadoop.mapred JobConf setInt
public void setInt(String name, int value)
name
property to an int
. From source file:edu.umn.cs.spatialHadoop.nasa.DistributedAggregateSpatioTemporalIndexer.java
License:Open Source License
/** * Build a bunch of AggregateQuadTrees using a Map-Reduce job * /*from w ww. j a v a2 s .c o m*/ * @param inputPathsDictionaryPath * @param params * @throws IOException */ public static void aggregateQuadTreeMapReduce(Path inputPathsDictionaryPath, OperationsParams params) throws IOException { // configure a map-reduce job JobConf job = new JobConf(params, DistributedAggregateSpatioTemporalIndexer.class); Path outputPath; String outputPathPrefix = "aggQuadTree_"; FileSystem outFs = FileSystem.get(job); do { outputPath = new Path(outputPathPrefix + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); job.setJobName("AggregateQuadTree"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapperClass(AggregateQuadTreeMaper.class); job.set(HDFSIndexPath, hdfsIndexPath.toString()); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); TextInputFormat.setInputPaths(job, inputPathsDictionaryPath); TextOutputFormat.setOutputPath(job, outputPath); if (job.getBoolean("local", false)) { // Enforce local execution if explicitly set by user or for small // files job.set("mapred.job.tracker", "local"); // Use multithreading too job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, 16); } job.setNumReduceTasks(0); // Submit the job JobClient.runJob(job); outFs.delete(outputPath, true); }
From source file:edu.umn.cs.spatialHadoop.operations.DistributedJoin.java
License:Open Source License
/** * Performs a redistribute join between the given files using the * redistribute join algorithm. Currently, we only support a pair of files. * @param inFiles/*from w w w . jav a 2 s . c o m*/ * @param userOutputPath * @param params * @return * @throws IOException */ public static <S extends Shape> long joinStep(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException { long t1 = System.currentTimeMillis(); JobConf job = new JobConf(params, DistributedJoin.class); FileSystem fs[] = new FileSystem[inFiles.length]; for (int i_file = 0; i_file < inFiles.length; i_file++) fs[i_file] = inFiles[i_file].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inFiles[0].getName() + ".dj_" + (int) (Math.random() * 1000000)); } while (fs[0].exists(outputPath)); } job.setJobName("DistributedJoin"); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); GlobalIndex<Partition> gindex1 = SpatialSite.getGlobalIndex(fs[0], inFiles[0]); GlobalIndex<Partition> gindex2 = SpatialSite.getGlobalIndex(fs[1], inFiles[1]); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); LOG.info("Joining " + inFiles[0] + " X " + inFiles[1]); if (SpatialSite.isRTree(fs[0], inFiles[0]) && SpatialSite.isRTree(fs[1], inFiles[1])) { job.setInputFormat(DJInputFormatRTree.class); } else { if (isOneShotReadMode) { // Ensure all objects are read in one shot job.setInt(SpatialSite.MaxBytesInOneRead, -1); job.setInt(SpatialSite.MaxShapesInOneRead, -1); } else { job.setInt(SpatialSite.MaxBytesInOneRead, maxBytesInOneRead); job.setInt(SpatialSite.MaxShapesInOneRead, maxShapesInOneRead); } job.setInputFormat(DJInputFormatArray.class); } // Set input paths and map function if (inFiles[0].equals(inFiles[1])) { // Self join job.setInputFormat(ShapeArrayInputFormat.class); // Remove the spatial filter to ensure all partitions are loaded FileInputFormat.setInputPaths(job, inFiles[0]); if (gindex1 != null && gindex1.isReplicated()) job.setMapperClass(RedistributeJoinMap.class); else job.setMapperClass(RedistributeJoinMapNoDupAvoidance.class); } else { // Binary version of spatial join (two different input files) job.setClass(SpatialSite.FilterClass, SpatialJoinFilter.class, BlockFilter.class); FileInputFormat.setInputPaths(job, inFiles); if ((gindex1 != null && gindex1.isReplicated()) || (gindex2 != null && gindex2.isReplicated())) { // Need the map function with duplicate avoidance step. job.setMapperClass(RedistributeJoinMap.class); } else { // No replication in both indexes, use map function with no dup // avoidance job.setMapperClass(RedistributeJoinMapNoDupAvoidance.class); } } Shape shape = params.getShape("shape"); job.setMapOutputKeyClass(shape.getClass()); job.setMapOutputValueClass(shape.getClass()); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setNumReduceTasks(0); // No reduce needed for this task if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); TextOutputFormat.setOutputPath(job, outputPath); if (!params.getBoolean("background", false)) { LOG.info("Submit job in sync mode"); RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); // Output number of running map tasks Counter mapTaskCountCounter = counters.findCounter(JobInProgress.Counter.TOTAL_LAUNCHED_MAPS); System.out.println("Number of map tasks " + mapTaskCountCounter.getValue()); // Delete output directory if not explicitly set by user if (userOutputPath == null) fs[0].delete(outputPath, true); long t2 = System.currentTimeMillis(); System.out.println("Join time " + (t2 - t1) + " millis"); return resultCount; } else { JobClient jc = new JobClient(job); LOG.info("Submit job in async mode"); lastRunningJob = jc.submitJob(job); LOG.info("Job " + lastRunningJob + " submitted successfully"); return -1; } }
From source file:edu.umn.cs.spatialHadoop.operations.FileMBR.java
License:Open Source License
/** * Computes the MBR of the input file using an aggregate MapReduce job. * /*from www .j a va 2 s . c o m*/ * @param inFile - Path to input file * @param params - Additional operation parameters * @return * @throws IOException * @throws InterruptedException */ private static <S extends Shape> Partition fileMBRMapReduce(Path[] inFiles, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, FileMBR.class); Path outputPath; FileSystem outFs = FileSystem.get(job); do { outputPath = new Path(inFiles[0].getName() + ".mbr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); job.setJobName("FileMBR"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Partition.class); job.setMapperClass(FileMBRMapper.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Combine.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setInputFormat(ShapeLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, inFiles); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputCommitter(MBROutputCommitter.class); // Submit the job if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); // Use multithreading too job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); } if (params.getBoolean("background", false)) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); return null; } else { lastSubmittedJob = JobClient.runJob(job); Counters counters = lastSubmittedJob.getCounters(); Counter outputSizeCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES); sizeOfLastProcessedFile = outputSizeCounter.getCounter(); FileStatus[] outFiles = outFs.listStatus(outputPath, SpatialSite.NonHiddenFileFilter); Partition mbr = new Partition(); mbr.set(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); OperationsParams localMBRParams = new OperationsParams(params); localMBRParams.setBoolean("local", true); // Enforce local execution localMBRParams.setClass("shape", Partition.class, Shape.class); for (FileStatus outFile : outFiles) { if (outFile.isDir()) continue; ShapeRecordReader<Partition> reader = new ShapeRecordReader<Partition>(localMBRParams, new FileSplit(outFile.getPath(), 0, outFile.getLen(), new String[0])); Rectangle key = reader.createKey(); Partition p = reader.createValue(); while (reader.next(key, p)) { mbr.expand(p); } reader.close(); } outFs.delete(outputPath, true); return mbr; } }
From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java
License:Open Source License
private static RunningJob indexMapReduce(Path inPath, Path outPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Indexer.class); job.setJobName("Indexer"); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) params.getShape("mbr"); if (inputMBR == null) inputMBR = FileMBR.fileMBR(inPath, params); OperationsParams.setShape(job, "mbr", inputMBR); // Set input and output job.setInputFormat(ShapeIterInputFormat.class); ShapeIterInputFormat.setInputPaths(job, inPath); job.setOutputFormat(IndexOutputFormat.class); GridOutputFormat.setOutputPath(job, outPath); // Set the correct partitioner according to index type String index = job.get("sindex"); if (index == null) throw new RuntimeException("Index type is not set"); long t1 = System.currentTimeMillis(); Partitioner partitioner = createPartitioner(inPath, outPath, job, index); Partitioner.setPartitioner(job, partitioner); long t2 = System.currentTimeMillis(); System.out.println("Total time for space subdivision in millis: " + (t2 - t1)); // Set mapper and reducer Shape shape = params.getShape("shape"); job.setMapperClass(IndexMethods.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(IndexMethods.class); job.setOutputCommitter(IndexerOutputCommitter.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); // Use multithreading in case the job is running locally job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (params.getBoolean("background", false)) { // Run in background JobClient jc = new JobClient(job); return jc.submitJob(job); } else {//from w w w . ja va2 s . c o m // Run and block until it is finished return JobClient.runJob(job); } }
From source file:edu.umn.cs.spatialHadoop.operations.Plot.java
License:Apache License
public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int width, int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount, boolean background) throws IOException { JobConf job = new JobConf(Plot.class); job.setJobName("Plot"); job.setMapperClass(PlotMap.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(PlotReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setMapOutputKeyClass(Rectangle.class); SpatialSite.setShapeClass(job, shape.getClass()); job.setMapOutputValueClass(shape.getClass()); FileSystem inFs = inFile.getFileSystem(job); Rectangle fileMbr = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false); FileStatus inFileStatus = inFs.getFileStatus(inFile); CellInfo[] cellInfos;/*from w w w. j av a 2 s. co m*/ GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(inFs, inFile); if (gindex == null) { // A heap file. The map function should partition the file GridInfo gridInfo = new GridInfo(fileMbr.x1, fileMbr.y1, fileMbr.x2, fileMbr.y2); gridInfo.calculateCellDimensions(inFileStatus.getLen(), inFileStatus.getBlockSize()); cellInfos = gridInfo.getAllCells(); // Doesn't make sense to show any partition information in a heap file showBorders = showBlockCount = showRecordCount = false; } else { cellInfos = SpatialSite.cellsOf(inFs, inFile); } // Set cell information in the job configuration to be used by the mapper SpatialSite.setCells(job, cellInfos); // Adjust width and height to maintain aspect ratio if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) { // Fix width and change height height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1)); } else { width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1)); } LOG.info("Creating an image of size " + width + "x" + height); ImageOutputFormat.setFileMBR(job, fileMbr); ImageOutputFormat.setImageWidth(job, width); ImageOutputFormat.setImageHeight(job, height); job.setBoolean(ShowBorders, showBorders); job.setBoolean(ShowBlockCount, showBlockCount); job.setBoolean(ShowRecordCount, showRecordCount); job.setInt(StrokeColor, color.getRGB()); // Set input and output job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); // Set output committer which will stitch images together after all reducers // finish job.setOutputCommitter(PlotOutputCommitter.class); job.setOutputFormat(ImageOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); if (background) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); } else { lastSubmittedJob = JobClient.runJob(job); } }
From source file:edu.umn.cs.spatialHadoop.operations.PlotPyramid.java
License:Apache License
public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int tileWidth, int tileHeight, int numLevels) throws IOException { JobConf job = new JobConf(PlotPyramid.class); job.setJobName("Plot"); job.setMapperClass(PlotMap.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(PlotReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); SpatialSite.setShapeClass(job, shape.getClass()); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(shape.getClass()); FileSystem inFs = inFile.getFileSystem(job); Rectangle fileMBR = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false); // Expand input file to a rectangle for compatibility with the pyramid // structure/*www.j a va 2s. co m*/ if (fileMBR.getWidth() > fileMBR.getHeight()) { fileMBR.y2 = fileMBR.y1 + fileMBR.getWidth(); } else { fileMBR.x2 = fileMBR.x1 + fileMBR.getHeight(); } SpatialSite.setRectangle(job, InputMBR, fileMBR); job.setInt(TileWidth, tileWidth); job.setInt(TileHeight, tileHeight); job.setInt(NumLevels, numLevels); // Set input and output job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); job.setOutputFormat(PyramidOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); JobClient.runJob(job); }
From source file:edu.umn.cs.spatialHadoop.operations.PyramidPlot.java
License:Apache License
/** * Plot a file to a set of images in different zoom levels using a MapReduce * program.//w ww.ja v a 2 s .c o m * @param <S> type of shapes stored in file * @param inFile - Path to the input file(s) * @param outFile - Path to the output file (image) * @param shape - A sample object to be used for parsing input file * @param tileWidth - With of each tile * @param tileHeight - Height of each tile * @param vflip - Set to <code>true</code> to file the whole image vertically * @param color - Color used to draw single shapes * @param numLevels - Number of zoom levels to plot * @throws IOException */ private static <S extends Shape> RunningJob plotMapReduce(Path inFile, Path outFile, OperationsParams params) throws IOException { Color color = params.getColor("color", Color.BLACK); String hdfDataset = (String) params.get("dataset"); Shape shape = hdfDataset != null ? new NASARectangle() : params.getShape("shape"); Shape plotRange = params.getShape("rect"); boolean background = params.is("background"); JobConf job = new JobConf(params, PyramidPlot.class); job.setJobName("PlotPyramid"); String partition = job.get("partition", "space").toLowerCase(); if (partition.equals("space")) { job.setMapperClass(SpacePartitionMap.class); job.setReducerClass(SpacePartitionReduce.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(shape.getClass()); job.setInputFormat(ShapeInputFormat.class); } else { job.setMapperClass(DataPartitionMap.class); job.setReducerClass(DataPartitionReduce.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(ImageWritable.class); job.setInputFormat(ShapeArrayInputFormat.class); } job.setInt("color", color.getRGB()); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); if (shape instanceof Point && job.getBoolean("sample", false)) { // Enable adaptive sampling int imageWidthRoot = job.getInt("tilewidth", 256); int imageHeightRoot = job.getInt("tileheight", 256); long recordCount = FileMBR.fileMBR(inFile, params).recordCount; float sampleRatio = params.getFloat(GeometricPlot.AdaptiveSampleFactor, 1.0f) * imageWidthRoot * imageHeightRoot / recordCount; job.setFloat(GeometricPlot.AdaptiveSampleRatio, sampleRatio); } Rectangle fileMBR; if (hdfDataset != null) { // Input is HDF job.set(HDFRecordReader.DatasetName, hdfDataset); job.setBoolean(HDFRecordReader.SkipFillValue, true); job.setClass("shape", NASARectangle.class, Shape.class); // Determine the range of values by opening one of the HDF files Aggregate.MinMax minMax = Aggregate.aggregate(new Path[] { inFile }, params); job.setInt(MinValue, minMax.minValue); job.setInt(MaxValue, minMax.maxValue); //fileMBR = new Rectangle(-180, -90, 180, 90); fileMBR = plotRange != null ? plotRange.getMBR() : new Rectangle(-180, -140, 180, 169); // job.setClass(HDFRecordReader.ProjectorClass, MercatorProjector.class, // GeoProjector.class); } else { fileMBR = FileMBR.fileMBR(inFile, params); } boolean keepAspectRatio = params.is("keep-ratio", true); if (keepAspectRatio) { // Expand input file to a rectangle for compatibility with the pyramid // structure if (fileMBR.getWidth() > fileMBR.getHeight()) { fileMBR.y1 -= (fileMBR.getWidth() - fileMBR.getHeight()) / 2; fileMBR.y2 = fileMBR.y1 + fileMBR.getWidth(); } else { fileMBR.x1 -= (fileMBR.getHeight() - fileMBR.getWidth() / 2); fileMBR.x2 = fileMBR.x1 + fileMBR.getHeight(); } } SpatialSite.setRectangle(job, InputMBR, fileMBR); // Set input and output ShapeInputFormat.addInputPath(job, inFile); if (plotRange != null) { job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); } job.setOutputFormat(PyramidOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); job.setOutputCommitter(PlotPyramidOutputCommitter.class); if (background) { JobClient jc = new JobClient(job); return lastSubmittedJob = jc.submitJob(job); } else { return lastSubmittedJob = JobClient.runJob(job); } }
From source file:edu.umn.cs.spatialHadoop.operations.RecordCount.java
License:Open Source License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing/*from www. j a v a 2s . com*/ * @param fs * @param inFile * @return * @throws IOException * @throws InterruptedException */ public static long recordCountMapReduce(FileSystem fs, Path inFile) throws IOException, InterruptedException { JobConf job = new JobConf(RecordCount.class); Path outputPath = new Path(inFile.toUri().getPath() + ".linecount"); FileSystem outFs = outputPath.getFileSystem(job); outFs.delete(outputPath, true); job.setJobName("LineCount"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Reduce.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(1); job.setInputFormat(ShapeLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFile); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job JobClient.runJob(job); // Read job result if (OperationsParams.isLocal(job, inFile)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); // Use multithreading too job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); } long lineCount = 0; FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus fileStatus : results) { if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) { LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath())); Text text = new Text(); if (lineReader.readLine(text) > 0) { lineCount = Long.parseLong(text.toString()); } lineReader.close(); } } outFs.delete(outputPath, true); return lineCount; }
From source file:edu.umn.cs.spatialHadoop.operations.Shuffle.java
License:Open Source License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing//from w w w . j a va 2 s .c o m * @param infile * @param outfile * @param params * @throws IOException */ public static void randomizerMapReduce(Path infile, Path outfile, OperationsParams params) throws IOException { JobConf job = new JobConf(Shuffle.class); job.setJobName("Randomizer"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setMapperClass(Map.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(Reduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInt(NumOfPartitions, Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(TextInputFormat.class); TextInputFormat.setInputPaths(job, infile); job.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outfile); // Submit the job JobClient.runJob(job); }
From source file:edu.yale.cs.hadoopdb.dataloader.GlobalHasher.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("GlobalHasher"); conf.setMapOutputKeyClass(UnsortableInt.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(GlobalHasher.Map.class); conf.setReducerClass(GlobalHasher.Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 5) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/*from www . j a va 2 s . co m*/ FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); int partNo = Integer.parseInt(args[2]); conf.setNumReduceTasks(partNo); conf.set(DELIMITER_PARAM, args[3]); int hashFieldPos = Integer.parseInt(args[4]); conf.setInt(HASH_FIELD_POS_PARAM, hashFieldPos); return conf; }