Example usage for org.apache.hadoop.mapreduce RecordReader close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader close.

Prototype

public abstract void close() throws IOException;

Source Link

Document

Close the record reader.

Usage

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

public static long spatialJoinLocal(Path[] inFiles, Path outFile, OperationsParams params)
        throws IOException, InterruptedException {
    // Read the inputs and store them in memory
    List<Shape>[] datasets = new List[inFiles.length];
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    for (int i = 0; i < inFiles.length; i++) {
        datasets[i] = new ArrayList<Shape>();
        FileSystem inFs = inFiles[i].getFileSystem(params);
        Job job = Job.getInstance(params);
        SpatialInputFormat3.addInputPath(job, inFiles[i]);
        for (InputSplit split : inputFormat.getSplits(job)) {
            FileSplit fsplit = (FileSplit) split;
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }//from ww  w  . j  av  a2 s .  c  o m

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    datasets[i].add(shape.clone());
                }
            }
            reader.close();
        }
    }

    // Apply the spatial join algorithm
    ResultCollector2<Shape, Shape> output = null;
    PrintStream out = null;
    if (outFile != null) {
        FileSystem outFS = outFile.getFileSystem(params);
        out = new PrintStream(outFS.create(outFile));
        final PrintStream outout = out;
        output = new ResultCollector2<Shape, Shape>() {
            @Override
            public void collect(Shape r, Shape s) {
                outout.println(r.toText(new Text()) + "," + s.toText(new Text()));
            }
        };
    }
    long resultCount = SpatialJoin_planeSweep(datasets[0], datasets[1], output, null);

    if (out != null)
        out.close();

    return resultCount;
}

From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java

License:Open Source License

/**
 * Compute the Deluanay triangulation in the local machine
 * @param inPaths/* w  w w .  j a  va 2s. co  m*/
 * @param outPath
 * @param params
 * @throws IOException
 * @throws InterruptedException
 */
public static void delaunayLocal(Path[] inPaths, Path outPath, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inPaths);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    final Point[][] allLists = new Point[splits.size()][];

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() {
        @Override
        public Integer run(int i1, int i2) {
            try {
                int numPoints = 0;
                for (int i = i1; i < i2; i++) {
                    List<Point> points = new ArrayList<Point>();
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points.add(p.clone());
                        }
                    }
                    reader.close();
                    numPoints += points.size();
                    allLists[i] = points.toArray(new Point[points.size()]);
                }
                return numPoints;
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (int numPoints : numsPoints)
        totalNumPoints += numPoints;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (int iList = 0; iList < allLists.length; iList++) {
        System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length);
        pointer += allLists[iList].length;
        allLists[iList] = null; // To let the GC collect it
    }

    if (params.getBoolean("dedup", true)) {
        float threshold = params.getFloat("threshold", 1E-5f);
        allPoints = SpatialAlgorithms.deduplicatePoints(allPoints, threshold);
    }

    LOG.info("Computing DT for " + allPoints.length + " points");
    GSDTAlgorithm dtAlgorithm = new GSImprovedAlgorithm(allPoints, null);
    LOG.info("DT computed");

    Rectangle mbr = FileMBR.fileMBR(inPaths, params);
    double buffer = Math.max(mbr.getWidth(), mbr.getHeight()) / 10;
    Rectangle bigMBR = mbr.buffer(buffer, buffer);
    if (outPath != null && params.getBoolean("output", true)) {
        LOG.info("Writing the output as a soup of triangles");
        Triangulation answer = dtAlgorithm.getFinalTriangulation();
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream out = new PrintStream(outFS.create(outPath));

        Text text = new Text2();
        byte[] tab = "\t".getBytes();
        for (Point[] triangle : answer.iterateTriangles()) {
            text.clear();
            triangle[0].toText(text);
            text.append(tab, 0, tab.length);
            triangle[1].toText(text);
            text.append(tab, 0, tab.length);
            triangle[2].toText(text);
            out.println(text);
        }
        out.close();
    }

    //    dtAlgorithm.getFinalTriangulation().draw();
    //Triangulation finalPart = new Triangulation();
    //Triangulation nonfinalPart = new Triangulation();
    //dtAlgorithm.splitIntoFinalAndNonFinalParts(new Rectangle(-180, -90, 180, 90), finalPart, nonfinalPart);
}

From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java

License:Open Source License

private static void indexLocal(Path inPath, final Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    Job job = Job.getInstance(params);/* w  w w  .j  a va2  s  .  c  om*/
    final Configuration conf = job.getConfiguration();

    final String sindex = conf.get("sindex");

    // Start reading input file
    List<InputSplit> splits = new ArrayList<InputSplit>();
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    FileSystem inFs = inPath.getFileSystem(conf);
    FileStatus inFStatus = inFs.getFileStatus(inPath);
    if (inFStatus != null && !inFStatus.isDir()) {
        // One file, retrieve it immediately.
        // This is useful if the input is a hidden file which is automatically
        // skipped by FileInputFormat. We need to plot a hidden file for the case
        // of plotting partition boundaries of a spatial index
        splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0]));
    } else {
        SpatialInputFormat3.setInputPaths(job, inPath);
        for (InputSplit s : inputFormat.getSplits(job))
            splits.add(s);
    }

    // Copy splits to a final array to be used in parallel
    final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]);
    boolean replicate = PartitionerReplicate.get(sindex);

    // Set input file MBR if not already set
    Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr");
    if (inputMBR == null) {
        inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf));
        OperationsParams.setShape(conf, "mbr", inputMBR);
    }

    setLocalIndexer(conf, sindex);
    final Partitioner partitioner = createPartitioner(inPath, outPath, conf, sindex);

    final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex,
            outPath, conf);
    for (FileSplit fsplit : fsplits) {
        RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
        if (reader instanceof SpatialRecordReader3) {
            ((SpatialRecordReader3) reader).initialize(fsplit, conf);
        } else if (reader instanceof RTreeRecordReader3) {
            ((RTreeRecordReader3) reader).initialize(fsplit, conf);
        } else if (reader instanceof HDFRecordReader) {
            ((HDFRecordReader) reader).initialize(fsplit, conf);
        } else {
            throw new RuntimeException("Unknown record reader");
        }

        final IntWritable partitionID = new IntWritable();

        while (reader.nextKeyValue()) {
            Iterable<Shape> shapes = reader.getCurrentValue();
            if (replicate) {
                for (final Shape s : shapes) {
                    partitioner.overlapPartitions(s, new ResultCollector<Integer>() {
                        @Override
                        public void collect(Integer id) {
                            partitionID.set(id);
                            try {
                                recordWriter.write(partitionID, s);
                            } catch (IOException e) {
                                throw new RuntimeException(e);
                            }
                        }
                    });
                }
            } else {
                for (final Shape s : shapes) {
                    int pid = partitioner.overlapPartition(s);
                    if (pid != -1) {
                        partitionID.set(pid);
                        recordWriter.write(partitionID, s);
                    }
                }
            }
        }
        reader.close();
    }
    recordWriter.close(null);

    // Write the WKT formatted master file
    Path masterPath = new Path(outPath, "_master." + sindex);
    FileSystem outFs = outPath.getFileSystem(params);
    Path wktPath = new Path(outPath, "_" + sindex + ".wkt");
    PrintStream wktOut = new PrintStream(outFs.create(wktPath));
    wktOut.println("ID\tBoundaries\tRecord Count\tSize\tFile name");
    Text tempLine = new Text2();
    Partition tempPartition = new Partition();
    LineReader in = new LineReader(outFs.open(masterPath));
    while (in.readLine(tempLine) > 0) {
        tempPartition.fromText(tempLine);
        wktOut.println(tempPartition.toWKT());
    }
    in.close();
    wktOut.close();
}

From source file:edu.umn.cs.spatialHadoop.operations.ClosestPair.java

License:Open Source License

/**
 * Computes the closest pair using a local single-machine algorithm
 * (no MapReduce)//from  w  ww. j a v a2  s  .com
 * @param inPaths
 * @param params
 * @return
 * @throws IOException
 * @throws InterruptedException
 */
public static Pair closestPairLocal(Path[] inPaths, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inPaths);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    final Point[][] allLists = new Point[splits.size()][];

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() {
        @Override
        public Integer run(int i1, int i2) {
            int numPoints = 0;
            for (int i = i1; i < i2; i++) {
                try {
                    List<Point> points = new ArrayList<Point>();
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points.add(p.clone());
                        }
                    }
                    reader.close();
                    numPoints += points.size();
                    allLists[i] = points.toArray(new Point[points.size()]);
                } catch (IOException e) {
                    throw new RuntimeException("Error reading file", e);
                } catch (InterruptedException e) {
                    throw new RuntimeException("Error reading file", e);
                }
            }
            return numPoints;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (int numPoints : numsPoints)
        totalNumPoints += numPoints;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (int iList = 0; iList < allLists.length; iList++) {
        System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length);
        pointer += allLists[iList].length;
        allLists[iList] = null; // To let the GC collect it
    }

    LOG.info("Computing closest-pair for " + allPoints.length + " points");
    Pair closestPair = closestPairInMemory(allPoints, params.getInt(BruteForceThreshold, 100));
    return closestPair;
}

From source file:edu.umn.cs.spatialHadoop.operations.ConvexHull.java

License:Open Source License

/**
 * Computes the convex hull of an input file using a single machine algorithm.
 * The output is written to the output file. If output file is null, the
 * output is just thrown away./*  w  ww .j a  v a2 s  . c  om*/
 * @param inFile
 * @param outFile
 * @param params
 * @throws IOException
 * @throws InterruptedException
 */
public static void convexHullLocal(Path inFile, Path outFile, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inFile);
    final List<InputSplit> splits = inputFormat.getSplits(job);

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Point[]> allLists = Parallel.forEach(splits.size(), new RunnableRange<Point[]>() {
        @Override
        public Point[] run(int i1, int i2) {
            try {
                List<Point> finalPoints = new ArrayList<Point>();
                final int MaxSize = 100000;
                Point[] points = new Point[MaxSize];
                int size = 0;
                for (int i = i1; i < i2; i++) {
                    org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits
                            .get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points[size++] = p.clone();
                            if (size >= points.length) {
                                // Perform convex hull and write the result to finalPoints
                                Point[] chPoints = convexHullInMemory(points);
                                for (Point skylinePoint : chPoints)
                                    finalPoints.add(skylinePoint);
                                size = 0; // reset
                            }
                        }
                    }
                    reader.close();
                }
                while (size-- > 0)
                    finalPoints.add(points[size]);
                return finalPoints.toArray(new Point[finalPoints.size()]);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (Point[] list : allLists)
        totalNumPoints += list.length;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (Point[] list : allLists) {
        System.arraycopy(list, 0, allPoints, pointer, list.length);
        pointer += list.length;
    }
    allLists.clear(); // To the let the GC collect it

    Point[] ch = convexHullInMemory(allPoints);

    if (outFile != null) {
        if (params.getBoolean("overwrite", false)) {
            FileSystem outFs = outFile.getFileSystem(new Configuration());
            outFs.delete(outFile, true);
        }
        GridRecordWriter<Point> out = new GridRecordWriter<Point>(outFile, null, null, null);
        for (Point pt : ch) {
            out.write(NullWritable.get(), pt);
        }
        out.close(null);
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.FarthestPair.java

License:Open Source License

public static PairDistance farthestPairLocal(Path[] inPaths, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);/*from   www .  j  a  va  2 s .  co  m*/
    SpatialInputFormat3.setInputPaths(job, inPaths);
    final List<org.apache.hadoop.mapreduce.InputSplit> splits = inputFormat.getSplits(job);
    final Point[][] allLists = new Point[splits.size()][];

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() {
        @Override
        public Integer run(int i1, int i2) {
            try {
                int numPoints = 0;
                for (int i = i1; i < i2; i++) {
                    List<Point> points = new ArrayList<Point>();
                    org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits
                            .get(i);
                    final org.apache.hadoop.mapreduce.RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points.add(p.clone());
                        }
                    }
                    reader.close();
                    numPoints += points.size();
                    allLists[i] = points.toArray(new Point[points.size()]);
                }
                return numPoints;
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (int numPoints : numsPoints)
        totalNumPoints += numPoints;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (int iList = 0; iList < allLists.length; iList++) {
        System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length);
        pointer += allLists[iList].length;
        allLists[iList] = null; // To let the GC collect it
    }

    LOG.info("Computing closest-pair for " + allPoints.length + " points");
    long t1 = System.currentTimeMillis();
    Point[] convexHull = ConvexHull.convexHullInMemory(allPoints);
    long t2 = System.currentTimeMillis();
    PairDistance farthestPair = rotatingCallipers(convexHull);
    long t3 = System.currentTimeMillis();
    System.out.println("Convex hull in " + (t2 - t1) / 1000.0 + " seconds " + "and rotating calipers in "
            + (t3 - t2) / 1000.0 + " seconds");
    return farthestPair;
}

From source file:edu.umn.cs.spatialHadoop.operations.KNN.java

License:Open Source License

private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    int iterations = 0;
    FileSystem fs = inFile.getFileSystem(params);
    Point queryPoint = (Point) OperationsParams.getShape(params, "point");
    int k = params.getInt("k", 1);
    // Top-k objects are retained in this object
    PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k);

    SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();

    final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile);
    double kthDistance = Double.MAX_VALUE;
    if (gIndex != null) {
        // There is a global index, use it
        PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNN.ShapeWithDistance<Partition>>() {
            {/*from  ww  w  .  ja v  a  2 s . c om*/
                initialize(gIndex.size());
            }

            @Override
            protected boolean lessThan(Object a, Object b) {
                return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance;
            }
        };
        for (Partition p : gIndex) {
            double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y);
            partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance));
        }

        while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) {

            ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop();
            // Process this partition
            Path partitionPath = new Path(inFile, partitionToProcess.shape.filename);
            long length = fs.getFileStatus(partitionPath).getLen();
            FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]);
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    if (distance <= kthDistance)
                        knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }
            reader.close();

            if (knn.size() >= k)
                kthDistance = knn.top().distance;
        }
    } else {
        // No global index, have to scan the whole file
        Job job = new Job(params);
        SpatialInputFormat3.addInputPath(job, inFile);
        List<InputSplit> splits = inputFormat.getSplits(job);

        for (InputSplit split : splits) {
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(split, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }

            reader.close();
        }
        if (knn.size() >= k)
            kthDistance = knn.top().distance;
    }
    long resultCount = knn.size();
    if (outPath != null && params.getBoolean("output", true)) {
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream ps = new PrintStream(outFS.create(outPath));
        Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount);
        resultsOrdered.setSize((int) resultCount);
        while (knn.size() > 0) {
            ShapeWithDistance<S> nextAnswer = knn.pop();
            resultsOrdered.set(knn.size(), nextAnswer);
        }

        Text text = new Text();
        for (ShapeWithDistance<S> answer : resultsOrdered) {
            text.clear();
            TextSerializerHelper.serializeDouble(answer.distance, text, ',');
            answer.shape.toText(text);
            ps.println(text);
        }
        ps.close();
    }
    TotalIterations.addAndGet(iterations);
    return resultCount;

}

From source file:edu.umn.cs.spatialHadoop.operations.RangeQuery.java

License:Open Source License

/**
 * Runs a range query on the local machine (no MapReduce) and the output is
 * streamed to the provided result collector. The query might run in parallel
 * which makes it necessary to design the result collector to accept parallel
 * calls to the method {@link ResultCollector#collect(Object)}.
 * You can use {@link ResultCollectorSynchronizer} to synchronize calls to
 * your ResultCollector if you cannot design yours to be thread safe.
 * @param inPath/*from   w  w w .  jav a2s. c  o m*/
 * @param queryRange
 * @param shape
 * @param params
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException
 */
public static <S extends Shape> long rangeQueryLocal(Path inPath, final Shape queryRange, final S shape,
        final OperationsParams params, final ResultCollector<S> output)
        throws IOException, InterruptedException {
    // Set MBR of query shape in job configuration to work with the spatial filter
    OperationsParams.setShape(params, SpatialInputFormat3.InputQueryRange, queryRange.getMBR());
    // 1- Split the input path/file to get splits that can be processed independently
    final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inPath);
    final List<InputSplit> splits = inputFormat.getSplits(job);

    // 2- Process splits in parallel
    List<Long> results = Parallel.forEach(splits.size(), new RunnableRange<Long>() {
        @Override
        public Long run(int i1, int i2) {
            long results = 0;
            for (int i = i1; i < i2; i++) {
                try {
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit,
                            null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<S> shapes = reader.getCurrentValue();
                        for (Shape s : shapes) {
                            results++;
                            if (output != null)
                                output.collect((S) s);
                        }
                    }
                    reader.close();
                } catch (IOException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                } catch (InterruptedException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                }
            }
            return results;
        }
    });
    long totalResultSize = 0;
    for (long result : results)
        totalResultSize += result;
    return totalResultSize;
}

From source file:edu.umn.cs.spatialHadoop.operations.Skyline.java

License:Open Source License

/**
 * Computes the skyline of an input file using a single machine algorithm.
 * The output is written to the output file. If output file is null, the
 * output is just thrown away.//w  ww.j  a v  a  2 s. c  o  m
 * @param inFile
 * @param outFile
 * @param params
 * @throws IOException
 * @throws InterruptedException
 */
public static void skylineLocal(Path inFile, Path outFile, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inFile);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    final Direction dir = params.getDirection("dir", Direction.MaxMax);

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Point[]> allLists = Parallel.forEach(splits.size(), new RunnableRange<Point[]>() {
        @Override
        public Point[] run(int i1, int i2) {
            try {
                List<Point> finalPoints = new ArrayList<Point>();
                final int MaxSize = 100000;
                Point[] points = new Point[MaxSize];
                int size = 0;
                for (int i = i1; i < i2; i++) {
                    org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits
                            .get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points[size++] = p.clone();
                            if (size >= points.length) {
                                // Perform Skyline and write the result to finalPoints
                                Point[] skylinePoints = skylineInMemory(points, dir);
                                for (Point skylinePoint : skylinePoints)
                                    finalPoints.add(skylinePoint);
                                size = 0; // reset
                            }
                        }
                    }
                    reader.close();
                }
                while (size-- > 0)
                    finalPoints.add(points[size]);
                return finalPoints.toArray(new Point[finalPoints.size()]);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (Point[] list : allLists)
        totalNumPoints += list.length;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (Point[] list : allLists) {
        System.arraycopy(list, 0, allPoints, pointer, list.length);
        pointer += list.length;
    }
    allLists.clear(); // To the let the GC collect it

    Point[] skyline = skylineInMemory(allPoints, dir);

    if (outFile != null) {
        if (params.getBoolean("overwrite", false)) {
            FileSystem outFs = outFile.getFileSystem(new Configuration());
            outFs.delete(outFile, true);
        }
        GridRecordWriter<Point> out = new GridRecordWriter<Point>(outFile, null, null, null);
        for (Point pt : skyline) {
            out.write(NullWritable.get(), pt);
        }
        out.close(null);
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.Union.java

License:Open Source License

private static <S extends OGCJTSShape> void unionLocal(Path inPath, Path outPath, final OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    // 1- Split the input path/file to get splits that can be processed independently
    final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>();
    Job job = Job.getInstance(params);/*from   ww w .j av a 2  s .  c o  m*/
    SpatialInputFormat3.setInputPaths(job, inPath);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors());

    // 2- Process splits in parallel
    final List<Float> progresses = new Vector<Float>();
    final IntWritable overallProgress = new IntWritable(0);
    List<List<Geometry>> results = Parallel.forEach(splits.size(), new RunnableRange<List<Geometry>>() {
        @Override
        public List<Geometry> run(final int i1, final int i2) {
            final int pi;
            final IntWritable splitsProgress = new IntWritable();
            synchronized (progresses) {
                pi = progresses.size();
                progresses.add(0f);
            }
            final float progressRatio = (i2 - i1) / (float) splits.size();
            Progressable progress = new Progressable.NullProgressable() {
                @Override
                public void progress(float p) {
                    progresses.set(pi, p * ((splitsProgress.get() - i1) / (float) (i2 - i1)) * progressRatio);
                    float sum = 0;
                    for (float f : progresses)
                        sum += f;
                    int newProgress = (int) (sum * 100);
                    if (newProgress > overallProgress.get()) {
                        overallProgress.set(newProgress);
                        LOG.info("Local union progress " + newProgress + "%");
                    }
                }
            };

            final List<Geometry> localUnion = new ArrayList<Geometry>();
            ResultCollector<Geometry> output = new ResultCollector<Geometry>() {
                @Override
                public void collect(Geometry r) {
                    localUnion.add(r);
                }
            };

            final int MaxBatchSize = 100000;
            Geometry[] batch = new Geometry[MaxBatchSize];
            int batchSize = 0;
            for (int i = i1; i < i2; i++) {
                splitsProgress.set(i);
                try {
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit,
                            null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<S> shapes = reader.getCurrentValue();
                        for (S s : shapes) {
                            if (s.geom == null)
                                continue;
                            batch[batchSize++] = s.geom;
                            if (batchSize >= MaxBatchSize) {
                                SpatialAlgorithms.multiUnion(batch, progress, output);
                                batchSize = 0;
                            }
                        }
                    }
                    reader.close();
                } catch (IOException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                } catch (InterruptedException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                }
            }
            // Union all remaining geometries
            try {
                Geometry[] finalBatch = new Geometry[batchSize];
                System.arraycopy(batch, 0, finalBatch, 0, batchSize);
                SpatialAlgorithms.multiUnion(finalBatch, progress, output);
                return localUnion;
            } catch (IOException e) {
                // Should never happen as the context is passed as null
                throw new RuntimeException("Error in local union", e);
            }
        }
    }, parallelism);

    // Write result to output
    LOG.info("Merge the results of all splits");
    int totalNumGeometries = 0;
    for (List<Geometry> result : results)
        totalNumGeometries += result.size();
    List<Geometry> allInOne = new ArrayList<Geometry>(totalNumGeometries);
    for (List<Geometry> result : results)
        allInOne.addAll(result);

    final S outShape = (S) params.getShape("shape");
    final PrintStream out;
    if (outPath == null || !params.getBoolean("output", true)) {
        // Skip writing the output
        out = new PrintStream(new NullOutputStream());
    } else {
        FileSystem outFS = outPath.getFileSystem(params);
        out = new PrintStream(outFS.create(outPath));
    }

    SpatialAlgorithms.multiUnion(allInOne.toArray(new Geometry[allInOne.size()]),
            new Progressable.NullProgressable() {
                int lastProgress = 0;

                public void progress(float p) {
                    int newProgresss = (int) (p * 100);
                    if (newProgresss > lastProgress) {
                        LOG.info("Global union progress " + (lastProgress = newProgresss) + "%");
                    }
                }
            }, new ResultCollector<Geometry>() {
                Text line = new Text2();

                @Override
                public void collect(Geometry r) {
                    outShape.geom = r;
                    outShape.toText(line);
                    out.println(line);
                }
            });
    out.close();
}