com.ricemap.spateDB.util.RandomSpatialGenerator.java Source code

Java tutorial

Introduction

Here is the source code for com.ricemap.spateDB.util.RandomSpatialGenerator.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
 * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License is
 * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and limitations under the License.
 */
package com.ricemap.spateDB.util;

import java.awt.Color;
import java.awt.Rectangle;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;

import com.ricemap.spateDB.core.CellInfo;
import com.ricemap.spateDB.core.GridInfo;
import com.ricemap.spateDB.core.GridRecordWriter;
import com.ricemap.spateDB.core.Partition;
import com.ricemap.spateDB.core.ShapeRecordWriter;
import com.ricemap.spateDB.core.SpatialSite;
import com.ricemap.spateDB.mapred.GridOutputFormat;
import com.ricemap.spateDB.mapred.RandomInputFormat;
import com.ricemap.spateDB.mapred.RandomShapeGenerator;
import com.ricemap.spateDB.mapred.RandomShapeGenerator.DistributionType;
import com.ricemap.spateDB.operations.Plot;
import com.ricemap.spateDB.operations.Repartition;
import com.ricemap.spateDB.operations.Repartition.RepartitionReduce;
import com.ricemap.spateDB.shape.Prism;
import com.ricemap.spateDB.shape.Shape;

/**
 * Generates a random file of rectangles or points based on some user
 * parameters
 * @author tonyren, Ahmed Eldawy
 *
 */
public class RandomSpatialGenerator {

    public static void generateMapReduce(Path file, Prism mbr, long size, long blocksize, Shape shape,
            String sindex, long seed, int rectsize, RandomShapeGenerator.DistributionType type, boolean overwrite)
            throws IOException {
        JobConf job = new JobConf(RandomSpatialGenerator.class);

        job.setJobName("Generator");
        FileSystem outFs = file.getFileSystem(job);

        // Overwrite output file
        if (outFs.exists(file)) {
            if (overwrite)
                outFs.delete(file, true);
            else
                throw new RuntimeException(
                        "Output file '" + file + "' already exists and overwrite flag is not set");
        }

        // Set generation parameters in job
        job.setLong(RandomShapeGenerator.GenerationSize, size);
        SpatialSite.setPrism(job, RandomShapeGenerator.GenerationMBR, mbr);
        if (seed != 0)
            job.setLong(RandomShapeGenerator.GenerationSeed, seed);
        if (rectsize != 0)
            job.setInt(RandomShapeGenerator.GenerationRectSize, rectsize);
        if (type != null)
            job.set(RandomShapeGenerator.GenerationType, type.toString());

        ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
        // Set input format and map class
        job.setInputFormat(RandomInputFormat.class);
        job.setMapperClass(Repartition.RepartitionMap.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(shape.getClass());
        job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

        SpatialSite.setShapeClass(job, shape.getClass());

        if (blocksize != 0) {
            job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blocksize);
        }

        CellInfo[] cells;
        if (sindex == null) {
            cells = new CellInfo[] { new CellInfo(1, mbr) };
        } else if (sindex.equals("grid")) {
            GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2);
            FileSystem fs = file.getFileSystem(job);
            if (blocksize == 0) {
                blocksize = fs.getDefaultBlockSize(file);
            }
            int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, file, blocksize);
            gridInfo.calculateCellDimensions(numOfCells);
            cells = gridInfo.getAllCells();
        } else {
            throw new RuntimeException("Unsupported spatial index: " + sindex);
        }

        SpatialSite.setCells(job, cells);

        // Do not set a reduce function. Use the default identity reduce function
        if (cells.length == 1) {
            // All objects are in one partition. No need for a reduce phase
            job.setNumReduceTasks(0);
        } else {
            // More than one partition. Need a reduce phase to group shapes of the
            // same partition together
            job.setReducerClass(RepartitionReduce.class);
            job.setNumReduceTasks(
                    Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));
        }

        // Set output path
        FileOutputFormat.setOutputPath(job, file);
        if (sindex == null || sindex.equals("grid")) {
            job.setOutputFormat(GridOutputFormat.class);
        } else {
            throw new RuntimeException("Unsupported spatial index: " + sindex);
        }

        JobClient.runJob(job);

        // Concatenate all master files into one file
        FileStatus[] resultFiles = outFs.listStatus(file, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().contains("_master");
            }
        });
        String ext = resultFiles[0].getPath().getName()
                .substring(resultFiles[0].getPath().getName().lastIndexOf('.'));
        Path masterPath = new Path(file, "_master" + ext);
        OutputStream destOut = outFs.create(masterPath);
        byte[] buffer = new byte[4096];
        for (FileStatus f : resultFiles) {
            InputStream in = outFs.open(f.getPath());
            int bytes_read;
            do {
                bytes_read = in.read(buffer);
                if (bytes_read > 0)
                    destOut.write(buffer, 0, bytes_read);
            } while (bytes_read > 0);
            in.close();
            outFs.delete(f.getPath(), false);
        }
        destOut.close();

        // Plot an image for the partitions used in file
        Path imagePath = new Path(file, "_partitions.png");
        int imageSize = (int) (Math.sqrt(cells.length) * 300);
        Plot.plotLocal(masterPath, imagePath, new Partition(), imageSize, imageSize, Color.BLACK, false, false,
                false);
    }

    /**
     * Generates random rectangles and write the result to a file.
     * @param outFS - The file system that contains the output file
     * @param outputFile - The file name to write to. If either outFS or
     *   outputFile is null, data is generated to the standard output
     * @param mbr - The whole MBR to generate in
     * @param shape 
     * @param totalSize - The total size of the generated file
     * @param blocksize 
     * @throws IOException 
     */
    public static void generateFileLocal(Path outFile, Shape shape, String sindex, long totalSize, Prism mbr,
            DistributionType type, int rectSize, long seed, long blocksize, boolean overwrite) throws IOException {
        FileSystem outFS = outFile.getFileSystem(new Configuration());
        if (blocksize == 0)
            blocksize = outFS.getDefaultBlockSize(outFile);

        // Calculate the dimensions of each partition based on gindex type
        CellInfo[] cells;
        if (sindex == null) {
            cells = new CellInfo[] { new CellInfo(1, mbr) };
        } else if (sindex.equals("grid")) {
            int num_partitions = Repartition.calculateNumberOfPartitions(new Configuration(), totalSize, outFS,
                    outFile, blocksize);

            GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2);
            gridInfo.calculateCellDimensions(num_partitions);
            cells = gridInfo.getAllCells();
        } else {
            throw new RuntimeException("Unsupported spatial index: " + sindex);
        }

        // Overwrite output file
        if (outFS.exists(outFile)) {
            if (overwrite)
                outFS.delete(outFile, true);
            else
                throw new RuntimeException(
                        "Output file '" + outFile + "' already exists and overwrite flag is not set");
        }
        outFS.mkdirs(outFile);

        ShapeRecordWriter<Shape> writer;
        if (sindex == null || sindex.equals("grid")) {
            writer = new GridRecordWriter<Shape>(outFile, null, null, cells, false, false);
        } else {
            throw new RuntimeException("Unupoorted spatial idnex: " + sindex);
        }

        if (rectSize == 0)
            rectSize = 100;
        long t1 = System.currentTimeMillis();

        RandomShapeGenerator<Shape> generator = new RandomShapeGenerator<Shape>(totalSize, mbr, type, rectSize,
                seed);

        Prism key = generator.createKey();

        while (generator.next(key, shape)) {
            // Serialize it to text
            writer.write(NullWritable.get(), shape);
        }
        writer.close(null);
        long t2 = System.currentTimeMillis();

        System.out.println("Generation time: " + (t2 - t1) + " millis");
    }

    private static void printUsage() {
        System.out.println("Generates a file with random shapes");
        System.out.println("Parameters (* marks required parameters):");
        System.out
                .println("<output file> - Path to the file to generate. If omitted, file is generated to stdout.");
        System.out.println(
                "mbr:<x,y,w,h> - (*) The MBR of the generated data. Originated at (x,y) with dimensions (w,h)");
        System.out.println("shape:<point|(rectangle)|polygon> - Type of shapes in generated file");
        System.out.println("blocksize:<size> - Block size in the generated file");
        System.out.println("global:<grid|rtree> - Type of global index in generated file");
        System.out.println("local:<grid|rtree> - Type of local index in generated file");
        System.out.println("seed:<s> - Use a specific seed to generate the file");
        System.out.println("-overwrite - Overwrite output file without notice");
    }

    /**
     * @param args
     * @throws IOException 
     */
    public static void main(String[] args) throws IOException {
        CommandLineArguments cla = new CommandLineArguments(args);
        Prism mbr = cla.getPrism();
        if (mbr == null) {
            printUsage();
            throw new RuntimeException("Set MBR of the generated file using rect:<x,y,w,h>");
        }

        Path outputFile = cla.getPath();
        Shape stockShape = cla.getShape(false);
        long blocksize = cla.getBlockSize();
        int rectSize = cla.getRectSize();
        long seed = cla.getSeed();
        if (stockShape == null)
            stockShape = new Prism();

        long totalSize = cla.getSize();
        String sindex = cla.get("sindex");
        boolean overwrite = cla.isOverwrite();

        DistributionType type = DistributionType.UNIFORM;
        String strType = cla.get("type");
        if (strType != null) {
            strType = strType.toLowerCase();
            if (strType.startsWith("uni"))
                type = DistributionType.UNIFORM;
            else if (strType.startsWith("gaus"))
                type = DistributionType.GAUSSIAN;
            else if (strType.startsWith("cor"))
                type = DistributionType.CORRELATED;
            else if (strType.startsWith("anti"))
                type = DistributionType.ANTI_CORRELATED;
            else if (strType.startsWith("circle"))
                type = DistributionType.CIRCLE;
            else {
                System.err.println("Unknown distribution type: " + cla.get("type"));
                printUsage();
                return;
            }
        }

        if (outputFile != null) {
            System.out.print("Generating a file ");
            System.out.print("with sindex:" + sindex + " ");
            System.out.println("file of size: " + totalSize);
            System.out.println("To: " + outputFile);
            System.out.println("In the range: " + mbr);
        }

        if (totalSize < 100 * 1024 * 1024)
            generateFileLocal(outputFile, stockShape, sindex, totalSize, mbr, type, rectSize, seed, blocksize,
                    overwrite);
        else
            generateMapReduce(outputFile, mbr, totalSize, blocksize, stockShape, sindex, seed, rectSize, type,
                    overwrite);
        //    if (gindex == null && lindex == null)
        //      generateHeapFile(fs, outputFile, stockShape, totalSize, mbr, type, rectSize, seed, blocksize, overwrite);
        //    else
        //      generateGridFile(fs, outputFile, stockShape, totalSize, mbr, type, rectSize, seed, blocksize, gindex, lindex, overwrite);
    }

}