Example usage for org.apache.hadoop.mapred JobConf setLong

List of usage examples for org.apache.hadoop.mapred JobConf setLong

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setLong.

Prototype

public void setLong(String name, long value) 

Source Link

Document

Set the value of the name property to a long.

Usage

From source file:com.m6d.filecrush.crush.CrushReducerTest.java

License:Apache License

@Test
public void missingOutputRegex() {
    JobConf job = new JobConf(false);

    job.set("mapred.tip.id", "task_201011081200_14527_r_1234");

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.set("mapred.output.dir", outDir.getAbsolutePath());

    job.setLong("crush.timestamp", 98765);

    job.setLong("dfs.blocksize", 1024 * 1024 * 64L);

    job.setInt("crush.num.specs", 2);
    job.set("crush.0.regex", "foo");
    job.set("crush.0.regex.replacement", "bar");
    job.set("crush.0.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.0.output.format", TextOutputFormat.class.getName());

    job.set("crush.1.regex", "hello");
    job.set("crush.1.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.1.output.format", TextOutputFormat.class.getName());

    reducer = new CrushReducer();

    try {/*from   w ww  .java 2 s .c o  m*/
        reducer.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!"No output replacement: crush.1.regex.replacement".equals(e.getMessage())) {
            throw e;
        }
    }
}

From source file:com.m6d.filecrush.crush.CrushReducerTest.java

License:Apache License

@Test
public void missingInputFormat() {
    JobConf job = new JobConf(false);

    job.set("mapred.tip.id", "task_201011081200_14527_r_1234");

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.set("mapred.output.dir", outDir.getAbsolutePath());

    job.setLong("crush.timestamp", 98765);

    job.setLong("dfs.blocksize", 1024 * 1024 * 64L);

    job.setInt("crush.num.specs", 2);
    job.set("crush.0.regex", "foo");
    job.set("crush.0.regex.replacement", "bar");
    job.set("crush.0.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName());

    job.set("crush.1.regex", "hello");
    job.set("crush.1.regex.replacement", "hello");
    job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName());

    reducer = new CrushReducer();

    try {/* w w w .  j av a  2  s.c  o m*/
        reducer.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!"No input format: crush.1.input.format".equals(e.getMessage())) {
            throw e;
        }
    }
}

From source file:com.m6d.filecrush.crush.CrushReducerTest.java

License:Apache License

@Test
public void inputFormatWrongType() {
    JobConf job = new JobConf(false);

    job.set("mapred.tip.id", "task_201011081200_14527_r_1234");

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.set("mapred.output.dir", outDir.getAbsolutePath());

    job.setLong("crush.timestamp", 98765);

    job.setLong("dfs.blocksize", 1024 * 1024 * 64L);

    job.setInt("crush.num.specs", 2);
    job.set("crush.0.regex", "foo");
    job.set("crush.0.regex.replacement", "bar");
    job.set("crush.0.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName());

    job.set("crush.1.regex", "hello");
    job.set("crush.1.regex.replacement", "hello");
    job.set("crush.1.input.format", Object.class.getName());
    job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName());

    reducer = new CrushReducer();

    try {// w w  w. java2  s  .c  o m
        reducer.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!"Not a file input format: crush.1.input.format=java.lang.Object".equals(e.getMessage())) {
            throw e;
        }
    }
}

From source file:com.m6d.filecrush.crush.CrushReducerTest.java

License:Apache License

@Test
public void missingOutputFormat() {
    JobConf job = new JobConf(false);

    job.set("mapred.tip.id", "task_201011081200_14527_r_1234");

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.set("mapred.output.dir", outDir.getAbsolutePath());

    job.setLong("crush.timestamp", 98765);

    job.setLong("dfs.blocksize", 1024 * 1024 * 64L);

    job.setInt("crush.num.specs", 2);
    job.set("crush.0.regex", "foo");
    job.set("crush.0.regex.replacement", "bar");
    job.set("crush.0.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName());

    job.set("crush.1.regex", "hello");
    job.set("crush.1.regex.replacement", "hello");
    job.set("crush.1.input.format", SequenceFileInputFormat.class.getName());

    reducer = new CrushReducer();

    try {//from  w  w  w .j  av a2  s  .c o  m
        reducer.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!"No output format: crush.1.output.format".equals(e.getMessage())) {
            throw e;
        }
    }
}

From source file:com.m6d.filecrush.crush.CrushReducerTest.java

License:Apache License

@Test
public void outputFormatWrongType() {
    JobConf job = new JobConf(false);

    job.set("mapred.tip.id", "task_201011081200_14527_r_1234");

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.set("mapred.output.dir", outDir.getAbsolutePath());

    job.setLong("crush.timestamp", 98765);

    job.setLong("dfs.blocksize", 1024 * 1024 * 64L);

    job.setInt("crush.num.specs", 2);
    job.set("crush.0.regex", "foo");
    job.set("crush.0.regex.replacement", "bar");
    job.set("crush.0.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName());

    job.set("crush.1.regex", "hello");
    job.set("crush.1.regex.replacement", "hello");
    job.set("crush.1.input.format", TextInputFormat.class.getName());
    job.set("crush.1.output.format", Object.class.getName());

    reducer = new CrushReducer();

    try {/*from w  w  w  .java2s. c  om*/
        reducer.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!"Not an output format: crush.1.output.format=java.lang.Object".equals(e.getMessage())) {
            throw e;
        }
    }
}

From source file:com.manning.hip.ch4.joins.improved.impl.OptimizedDataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];//from   ww  w.ja  v  a  2 s .c om
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, OptimizedDataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir));
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setPartitionerClass(CompositeKeyPartitioner.class);
    job.setOutputKeyComparatorClass(CompositeKeyComparator.class);
    job.setOutputValueGroupingComparator(CompositeKeyOnlyComparator.class);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments/*  w  ww.j  a v a  2 s  . com*/
 */
private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    //set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        //skip file if the src and the dst files are the same.
                        skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                        //skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
}

From source file:com.ricemap.spateDB.operations.Repartition.java

License:Apache License

/**
 * Repartitions an input file according to the given list of cells.
 * @param inFile// ww w .  ja  va 2 s. c o  m
 * @param outPath
 * @param cellInfos
 * @param pack
 * @param rtree
 * @param overwrite
 * @throws IOException
 */
public static void repartitionMapReduce(Path inFile, Path outPath, Shape stockShape, long blockSize,
        CellInfo[] cellInfos, String sindex, boolean overwrite, boolean columnar) throws IOException {
    JobConf job = new JobConf(Repartition.class);
    job.setJobName("Repartition");
    FileSystem outFs = outPath.getFileSystem(job);

    // Overwrite output file
    if (outFs.exists(outPath)) {
        if (overwrite)
            outFs.delete(outPath, true);
        else
            throw new RuntimeException(
                    "Output file '" + outPath + "' already exists and overwrite flag is not set");
    }

    // Decide which map function to use depending on the type of global index
    if (sindex.equals("rtree")) {
        // Repartition without replication
        job.setMapperClass(RepartitionMapNoReplication.class);
    } else {
        // Repartition with replication (grid and r+tree)
        job.setMapperClass(RepartitionMap.class);
    }
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(stockShape.getClass());
    ShapeInputFormat.setInputPaths(job, inFile);
    job.setInputFormat(ShapeInputFormat.class);
    boolean pack = sindex.equals("r+tree");
    boolean expand = sindex.equals("rtree");
    job.setBoolean(SpatialSite.PACK_CELLS, pack);
    job.setBoolean(SpatialSite.EXPAND_CELLS, expand);
    job.setStrings(SpatialSite.STORAGE_MODE, columnar ? "columnar" : "normal");

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    // Set default parameters for reading input file
    SpatialSite.setShapeClass(job, stockShape.getClass());

    FileOutputFormat.setOutputPath(job, outPath);
    if (sindex.equals("grid")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else if (sindex.equals("rtree") || sindex.equals("r+tree")) {
        // For now, the two types of local index are the same
        job.setOutputFormat(RTreeGridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }
    // Copy block size from source file if it's globally indexed
    FileSystem inFs = inFile.getFileSystem(job);

    if (blockSize == 0) {
        GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inFile);
        if (globalIndex != null) {
            blockSize = inFs.getFileStatus(new Path(inFile, globalIndex.iterator().next().filename))
                    .getBlockSize();
            LOG.info("Automatically setting block size to " + blockSize);
        }
    }

    if (blockSize != 0)
        job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blockSize);
    SpatialSite.setCells(job, cellInfos);
    job.setBoolean(SpatialSite.OVERWRITE, overwrite);

    // Set reduce function
    job.setReducerClass(RepartitionReduce.class);
    job.setNumReduceTasks(
            Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));

    // Set output committer that combines output files together
    job.setOutputCommitter(RepartitionOutputCommitter.class);

    JobClient.runJob(job);
}

From source file:com.ricemap.spateDB.operations.Sampler.java

License:Apache License

/**
 * Sample a ratio of the file through a MapReduce job
 * @param fs// w  w w.j a  v a  2s  .  com
 * @param files
 * @param ratio
 * @param threshold - Maximum number of elements to be sampled
 * @param output
 * @param inObj
 * @return
 * @throws IOException
 */
public static <T extends TextSerializable, O extends TextSerializable> int sampleMapReduceWithRatio(
        FileSystem fs, Path[] files, double ratio, long threshold, long seed, final ResultCollector<O> output,
        T inObj, O outObj) throws IOException {
    JobConf job = new JobConf(FileMBR.class);

    Path outputPath;
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(files[0].toUri().getPath() + ".sample_" + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("Sample");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setClass(InClass, inObj.getClass(), TextSerializable.class);
    job.setClass(OutClass, outObj.getClass(), TextSerializable.class);

    job.setMapperClass(Map.class);
    job.setLong(RANDOM_SEED, seed);
    job.setFloat(SAMPLE_RATIO, (float) ratio);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(0);

    job.setInputFormat(ShapeLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    ShapeLineInputFormat.setInputPaths(job, files);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    RunningJob run_job = JobClient.runJob(job);

    Counters counters = run_job.getCounters();
    Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
    final long resultCount = outputRecordCounter.getValue();

    Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES);
    Sampler.sizeOfLastProcessedFile = inputBytesCounter.getValue();

    // Ratio of records to return from output based on the threshold
    // Note that any number greater than or equal to one will cause all
    // elements to be returned
    final double selectRatio = (double) threshold / resultCount;

    // Read job result
    int result_size = 0;
    if (output != null) {
        Text line = new Text();
        FileStatus[] results = outFs.listStatus(outputPath);

        for (FileStatus fileStatus : results) {
            if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) {
                LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath()));
                try {
                    while (lineReader.readLine(line) > 0) {
                        if (Math.random() < selectRatio) {
                            if (output != null) {
                                outObj.fromText(line);
                                output.collect(outObj);
                            }
                            result_size++;
                        }
                    }
                } catch (RuntimeException e) {
                    e.printStackTrace();
                }
                lineReader.close();
            }
        }
    }

    outFs.delete(outputPath, true);

    return result_size;
}

From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java

License:Apache License

public static void generateMapReduce(Path file, Prism mbr, long size, long blocksize, Shape shape,
        String sindex, long seed, int rectsize, RandomShapeGenerator.DistributionType type, boolean overwrite)
        throws IOException {
    JobConf job = new JobConf(RandomSpatialGenerator.class);

    job.setJobName("Generator");
    FileSystem outFs = file.getFileSystem(job);

    // Overwrite output file
    if (outFs.exists(file)) {
        if (overwrite)
            outFs.delete(file, true);// ww w  .  ja  v a 2 s. c o m
        else
            throw new RuntimeException(
                    "Output file '" + file + "' already exists and overwrite flag is not set");
    }

    // Set generation parameters in job
    job.setLong(RandomShapeGenerator.GenerationSize, size);
    SpatialSite.setPrism(job, RandomShapeGenerator.GenerationMBR, mbr);
    if (seed != 0)
        job.setLong(RandomShapeGenerator.GenerationSeed, seed);
    if (rectsize != 0)
        job.setInt(RandomShapeGenerator.GenerationRectSize, rectsize);
    if (type != null)
        job.set(RandomShapeGenerator.GenerationType, type.toString());

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    // Set input format and map class
    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Repartition.RepartitionMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    SpatialSite.setShapeClass(job, shape.getClass());

    if (blocksize != 0) {
        job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blocksize);
    }

    CellInfo[] cells;
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2);
        FileSystem fs = file.getFileSystem(job);
        if (blocksize == 0) {
            blocksize = fs.getDefaultBlockSize(file);
        }
        int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, file, blocksize);
        gridInfo.calculateCellDimensions(numOfCells);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    SpatialSite.setCells(job, cells);

    // Do not set a reduce function. Use the default identity reduce function
    if (cells.length == 1) {
        // All objects are in one partition. No need for a reduce phase
        job.setNumReduceTasks(0);
    } else {
        // More than one partition. Need a reduce phase to group shapes of the
        // same partition together
        job.setReducerClass(RepartitionReduce.class);
        job.setNumReduceTasks(
                Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));
    }

    // Set output path
    FileOutputFormat.setOutputPath(job, file);
    if (sindex == null || sindex.equals("grid")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    JobClient.runJob(job);

    // Concatenate all master files into one file
    FileStatus[] resultFiles = outFs.listStatus(file, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().contains("_master");
        }
    });
    String ext = resultFiles[0].getPath().getName()
            .substring(resultFiles[0].getPath().getName().lastIndexOf('.'));
    Path masterPath = new Path(file, "_master" + ext);
    OutputStream destOut = outFs.create(masterPath);
    byte[] buffer = new byte[4096];
    for (FileStatus f : resultFiles) {
        InputStream in = outFs.open(f.getPath());
        int bytes_read;
        do {
            bytes_read = in.read(buffer);
            if (bytes_read > 0)
                destOut.write(buffer, 0, bytes_read);
        } while (bytes_read > 0);
        in.close();
        outFs.delete(f.getPath(), false);
    }
    destOut.close();

    // Plot an image for the partitions used in file
    Path imagePath = new Path(file, "_partitions.png");
    int imageSize = (int) (Math.sqrt(cells.length) * 300);
    Plot.plotLocal(masterPath, imagePath, new Partition(), imageSize, imageSize, Color.BLACK, false, false,
            false);
}