Example usage for org.apache.hadoop.mapreduce RecordReader close

List of usage examples for org.apache.hadoop.mapreduce RecordReader close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader close.

Prototype

public abstract void close() throws IOException;

Source Link

Document

Close the record reader.

Usage

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

public static long spatialJoinLocal(Path[] inFiles, Path outFile, OperationsParams params)
        throws IOException, InterruptedException {
    // Read the inputs and store them in memory
    List<Shape>[] datasets = new List[inFiles.length];
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    for (int i = 0; i < inFiles.length; i++) {
        datasets[i] = new ArrayList<Shape>();
        FileSystem inFs = inFiles[i].getFileSystem(params);
        Job job = Job.getInstance(params);
        SpatialInputFormat3.addInputPath(job, inFiles[i]);
        for (InputSplit split : inputFormat.getSplits(job)) {
            FileSplit fsplit = (FileSplit) split;
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }//from ww  w  . j  av  a2 s .  c  o m

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    datasets[i].add(shape.clone());
                }
            }
            reader.close();
        }
    }

    // Apply the spatial join algorithm
    ResultCollector2<Shape, Shape> output = null;
    PrintStream out = null;
    if (outFile != null) {
        FileSystem outFS = outFile.getFileSystem(params);
        out = new PrintStream(outFS.create(outFile));
        final PrintStream outout = out;
        output = new ResultCollector2<Shape, Shape>() {
            @Override
            public void collect(Shape r, Shape s) {
                outout.println(r.toText(new Text()) + "," + s.toText(new Text()));
            }
        };
    }
    long resultCount = SpatialJoin_planeSweep(datasets[0], datasets[1], output, null);

    if (out != null)
        out.close();

    return resultCount;
}

From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java

License:Open Source License

/**
 * Compute the Deluanay triangulation in the local machine
 * @param inPaths/* w  w w .  j a  va 2s. co  m*/
 * @param outPath
 * @param params
 * @throws IOException
 * @throws InterruptedException
 */
public static void delaunayLocal(Path[] inPaths, Path outPath, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inPaths);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    final Point[][] allLists = new Point[splits.size()][];

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() {
        @Override
        public Integer run(int i1, int i2) {
            try {
                int numPoints = 0;
                for (int i = i1; i < i2; i++) {
                    List<Point> points = new ArrayList<Point>();
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points.add(p.clone());
                        }
                    }
                    reader.close();
                    numPoints += points.size();
                    allLists[i] = points.toArray(new Point[points.size()]);
                }
                return numPoints;
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (int numPoints : numsPoints)
        totalNumPoints += numPoints;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (int iList = 0; iList < allLists.length; iList++) {
        System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length);
        pointer += allLists[iList].length;
        allLists[iList] = null; // To let the GC collect it
    }

    if (params.getBoolean("dedup", true)) {
        float threshold = params.getFloat("threshold", 1E-5f);
        allPoints = SpatialAlgorithms.deduplicatePoints(allPoints, threshold);
    }

    LOG.info("Computing DT for " + allPoints.length + " points");
    GSDTAlgorithm dtAlgorithm = new GSImprovedAlgorithm(allPoints, null);
    LOG.info("DT computed");

    Rectangle mbr = FileMBR.fileMBR(inPaths, params);
    double buffer = Math.max(mbr.getWidth(), mbr.getHeight()) / 10;
    Rectangle bigMBR = mbr.buffer(buffer, buffer);
    if (outPath != null && params.getBoolean("output", true)) {
        LOG.info("Writing the output as a soup of triangles");
        Triangulation answer = dtAlgorithm.getFinalTriangulation();
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream out = new PrintStream(outFS.create(outPath));

        Text text = new Text2();
        byte[] tab = "\t".getBytes();
        for (Point[] triangle : answer.iterateTriangles()) {
            text.clear();
            triangle[0].toText(text);
            text.append(tab, 0, tab.length);
            triangle[1].toText(text);
            text.append(tab, 0, tab.length);
            triangle[2].toText(text);
            out.println(text);
        }
        out.close();
    }

    //    dtAlgorithm.getFinalTriangulation().draw();
    //Triangulation finalPart = new Triangulation();
    //Triangulation nonfinalPart = new Triangulation();
    //dtAlgorithm.splitIntoFinalAndNonFinalParts(new Rectangle(-180, -90, 180, 90), finalPart, nonfinalPart);
}

From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java

License:Open Source License

private static void indexLocal(Path inPath, final Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    Job job = Job.getInstance(params);/* w  w w  .j  a va2  s  .  c  om*/
    final Configuration conf = job.getConfiguration();

    final String sindex = conf.get("sindex");

    // Start reading input file
    List<InputSplit> splits = new ArrayList<InputSplit>();
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    FileSystem inFs = inPath.getFileSystem(conf);
    FileStatus inFStatus = inFs.getFileStatus(inPath);
    if (inFStatus != null && !inFStatus.isDir()) {
        // One file, retrieve it immediately.
        // This is useful if the input is a hidden file which is automatically
        // skipped by FileInputFormat. We need to plot a hidden file for the case
        // of plotting partition boundaries of a spatial index
        splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0]));
    } else {
        SpatialInputFormat3.setInputPaths(job, inPath);
        for (InputSplit s : inputFormat.getSplits(job))
            splits.add(s);
    }

    // Copy splits to a final array to be used in parallel
    final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]);
    boolean replicate = PartitionerReplicate.get(sindex);

    // Set input file MBR if not already set
    Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr");
    if (inputMBR == null) {
        inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf));
        OperationsParams.setShape(conf, "mbr", inputMBR);
    }

    setLocalIndexer(conf, sindex);
    final Partitioner partitioner = createPartitioner(inPath, outPath, conf, sindex);

    final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex,
            outPath, conf);
    for (FileSplit fsplit : fsplits) {
        RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
        if (reader instanceof SpatialRecordReader3) {
            ((SpatialRecordReader3) reader).initialize(fsplit, conf);
        } else if (reader instanceof RTreeRecordReader3) {
            ((RTreeRecordReader3) reader).initialize(fsplit, conf);
        } else if (reader instanceof HDFRecordReader) {
            ((HDFRecordReader) reader).initialize(fsplit, conf);
        } else {
            throw new RuntimeException("Unknown record reader");
        }

        final IntWritable partitionID = new IntWritable();

        while (reader.nextKeyValue()) {
            Iterable<Shape> shapes = reader.getCurrentValue();
            if (replicate) {
                for (final Shape s : shapes) {
                    partitioner.overlapPartitions(s, new ResultCollector<Integer>() {
                        @Override
                        public void collect(Integer id) {
                            partitionID.set(id);
                            try {
                                recordWriter.write(partitionID, s);
                            } catch (IOException e) {
                                throw new RuntimeException(e);
                            }
                        }
                    });
                }
            } else {
                for (final Shape s : shapes) {
                    int pid = partitioner.overlapPartition(s);
                    if (pid != -1) {
                        partitionID.set(pid);
                        recordWriter.write(partitionID, s);
                    }
                }
            }
        }
        reader.close();
    }
    recordWriter.close(null);

    // Write the WKT formatted master file
    Path masterPath = new Path(outPath, "_master." + sindex);
    FileSystem outFs = outPath.getFileSystem(params);
    Path wktPath = new Path(outPath, "_" + sindex + ".wkt");
    PrintStream wktOut = new PrintStream(outFs.create(wktPath));
    wktOut.println("ID\tBoundaries\tRecord Count\tSize\tFile name");
    Text tempLine = new Text2();
    Partition tempPartition = new Partition();
    LineReader in = new LineReader(outFs.open(masterPath));
    while (in.readLine(tempLine) > 0) {
        tempPartition.fromText(tempLine);
        wktOut.println(tempPartition.toWKT());
    }
    in.close();
    wktOut.close();
}

From source file:edu.umn.cs.spatialHadoop.operations.ClosestPair.java

License:Open Source License

/**
 * Computes the closest pair using a local single-machine algorithm
 * (no MapReduce)//from  w  ww. j a v a2  s  .com
 * @param inPaths
 * @param params
 * @return
 * @throws IOException
 * @throws InterruptedException
 */
public static Pair closestPairLocal(Path[] inPaths, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inPaths);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    final Point[][] allLists = new Point[splits.size()][];

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() {
        @Override
        public Integer run(int i1, int i2) {
            int numPoints = 0;
            for (int i = i1; i < i2; i++) {
                try {
                    List<Point> points = new ArrayList<Point>();
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points.add(p.clone());
                        }
                    }
                    reader.close();
                    numPoints += points.size();
                    allLists[i] = points.toArray(new Point[points.size()]);
                } catch (IOException e) {
                    throw new RuntimeException("Error reading file", e);
                } catch (InterruptedException e) {
                    throw new RuntimeException("Error reading file", e);
                }
            }
            return numPoints;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (int numPoints : numsPoints)
        totalNumPoints += numPoints;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (int iList = 0; iList < allLists.length; iList++) {
        System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length);
        pointer += allLists[iList].length;
        allLists[iList] = null; // To let the GC collect it
    }

    LOG.info("Computing closest-pair for " + allPoints.length + " points");
    Pair closestPair = closestPairInMemory(allPoints, params.getInt(BruteForceThreshold, 100));
    return closestPair;
}

From source file:edu.umn.cs.spatialHadoop.operations.ConvexHull.java

License:Open Source License

/**
 * Computes the convex hull of an input file using a single machine algorithm.
 * The output is written to the output file. If output file is null, the
 * output is just thrown away./*  w  ww .j a  v a2 s  . c  om*/
 * @param inFile
 * @param outFile
 * @param params
 * @throws IOException
 * @throws InterruptedException
 */
public static void convexHullLocal(Path inFile, Path outFile, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inFile);
    final List<InputSplit> splits = inputFormat.getSplits(job);

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Point[]> allLists = Parallel.forEach(splits.size(), new RunnableRange<Point[]>() {
        @Override
        public Point[] run(int i1, int i2) {
            try {
                List<Point> finalPoints = new ArrayList<Point>();
                final int MaxSize = 100000;
                Point[] points = new Point[MaxSize];
                int size = 0;
                for (int i = i1; i < i2; i++) {
                    org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits
                            .get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points[size++] = p.clone();
                            if (size >= points.length) {
                                // Perform convex hull and write the result to finalPoints
                                Point[] chPoints = convexHullInMemory(points);
                                for (Point skylinePoint : chPoints)
                                    finalPoints.add(skylinePoint);
                                size = 0; // reset
                            }
                        }
                    }
                    reader.close();
                }
                while (size-- > 0)
                    finalPoints.add(points[size]);
                return finalPoints.toArray(new Point[finalPoints.size()]);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (Point[] list : allLists)
        totalNumPoints += list.length;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (Point[] list : allLists) {
        System.arraycopy(list, 0, allPoints, pointer, list.length);
        pointer += list.length;
    }
    allLists.clear(); // To the let the GC collect it

    Point[] ch = convexHullInMemory(allPoints);

    if (outFile != null) {
        if (params.getBoolean("overwrite", false)) {
            FileSystem outFs = outFile.getFileSystem(new Configuration());
            outFs.delete(outFile, true);
        }
        GridRecordWriter<Point> out = new GridRecordWriter<Point>(outFile, null, null, null);
        for (Point pt : ch) {
            out.write(NullWritable.get(), pt);
        }
        out.close(null);
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.FarthestPair.java

License:Open Source License

public static PairDistance farthestPairLocal(Path[] inPaths, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);/*from   www .  j  a  va  2 s .  co  m*/
    SpatialInputFormat3.setInputPaths(job, inPaths);
    final List<org.apache.hadoop.mapreduce.InputSplit> splits = inputFormat.getSplits(job);
    final Point[][] allLists = new Point[splits.size()][];

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() {
        @Override
        public Integer run(int i1, int i2) {
            try {
                int numPoints = 0;
                for (int i = i1; i < i2; i++) {
                    List<Point> points = new ArrayList<Point>();
                    org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits
                            .get(i);
                    final org.apache.hadoop.mapreduce.RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points.add(p.clone());
                        }
                    }
                    reader.close();
                    numPoints += points.size();
                    allLists[i] = points.toArray(new Point[points.size()]);
                }
                return numPoints;
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (int numPoints : numsPoints)
        totalNumPoints += numPoints;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (int iList = 0; iList < allLists.length; iList++) {
        System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length);
        pointer += allLists[iList].length;
        allLists[iList] = null; // To let the GC collect it
    }

    LOG.info("Computing closest-pair for " + allPoints.length + " points");
    long t1 = System.currentTimeMillis();
    Point[] convexHull = ConvexHull.convexHullInMemory(allPoints);
    long t2 = System.currentTimeMillis();
    PairDistance farthestPair = rotatingCallipers(convexHull);
    long t3 = System.currentTimeMillis();
    System.out.println("Convex hull in " + (t2 - t1) / 1000.0 + " seconds " + "and rotating calipers in "
            + (t3 - t2) / 1000.0 + " seconds");
    return farthestPair;
}

From source file:edu.umn.cs.spatialHadoop.operations.KNN.java

License:Open Source License

private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    int iterations = 0;
    FileSystem fs = inFile.getFileSystem(params);
    Point queryPoint = (Point) OperationsParams.getShape(params, "point");
    int k = params.getInt("k", 1);
    // Top-k objects are retained in this object
    PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k);

    SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();

    final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile);
    double kthDistance = Double.MAX_VALUE;
    if (gIndex != null) {
        // There is a global index, use it
        PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNN.ShapeWithDistance<Partition>>() {
            {/*from  ww  w  .  ja v  a  2 s . c om*/
                initialize(gIndex.size());
            }

            @Override
            protected boolean lessThan(Object a, Object b) {
                return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance;
            }
        };
        for (Partition p : gIndex) {
            double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y);
            partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance));
        }

        while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) {

            ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop();
            // Process this partition
            Path partitionPath = new Path(inFile, partitionToProcess.shape.filename);
            long length = fs.getFileStatus(partitionPath).getLen();
            FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]);
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    if (distance <= kthDistance)
                        knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }
            reader.close();

            if (knn.size() >= k)
                kthDistance = knn.top().distance;
        }
    } else {
        // No global index, have to scan the whole file
        Job job = new Job(params);
        SpatialInputFormat3.addInputPath(job, inFile);
        List<InputSplit> splits = inputFormat.getSplits(job);

        for (InputSplit split : splits) {
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(split, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }

            reader.close();
        }
        if (knn.size() >= k)
            kthDistance = knn.top().distance;
    }
    long resultCount = knn.size();
    if (outPath != null && params.getBoolean("output", true)) {
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream ps = new PrintStream(outFS.create(outPath));
        Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount);
        resultsOrdered.setSize((int) resultCount);
        while (knn.size() > 0) {
            ShapeWithDistance<S> nextAnswer = knn.pop();
            resultsOrdered.set(knn.size(), nextAnswer);
        }

        Text text = new Text();
        for (ShapeWithDistance<S> answer : resultsOrdered) {
            text.clear();
            TextSerializerHelper.serializeDouble(answer.distance, text, ',');
            answer.shape.toText(text);
            ps.println(text);
        }
        ps.close();
    }
    TotalIterations.addAndGet(iterations);
    return resultCount;

}

From source file:edu.umn.cs.spatialHadoop.operations.RangeQuery.java

License:Open Source License

/**
 * Runs a range query on the local machine (no MapReduce) and the output is
 * streamed to the provided result collector. The query might run in parallel
 * which makes it necessary to design the result collector to accept parallel
 * calls to the method {@link ResultCollector#collect(Object)}.
 * You can use {@link ResultCollectorSynchronizer} to synchronize calls to
 * your ResultCollector if you cannot design yours to be thread safe.
 * @param inPath/*from   w  w w .  jav a2s. c  o m*/
 * @param queryRange
 * @param shape
 * @param params
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException
 */
public static <S extends Shape> long rangeQueryLocal(Path inPath, final Shape queryRange, final S shape,
        final OperationsParams params, final ResultCollector<S> output)
        throws IOException, InterruptedException {
    // Set MBR of query shape in job configuration to work with the spatial filter
    OperationsParams.setShape(params, SpatialInputFormat3.InputQueryRange, queryRange.getMBR());
    // 1- Split the input path/file to get splits that can be processed independently
    final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inPath);
    final List<InputSplit> splits = inputFormat.getSplits(job);

    // 2- Process splits in parallel
    List<Long> results = Parallel.forEach(splits.size(), new RunnableRange<Long>() {
        @Override
        public Long run(int i1, int i2) {
            long results = 0;
            for (int i = i1; i < i2; i++) {
                try {
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit,
                            null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<S> shapes = reader.getCurrentValue();
                        for (Shape s : shapes) {
                            results++;
                            if (output != null)
                                output.collect((S) s);
                        }
                    }
                    reader.close();
                } catch (IOException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                } catch (InterruptedException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                }
            }
            return results;
        }
    });
    long totalResultSize = 0;
    for (long result : results)
        totalResultSize += result;
    return totalResultSize;
}

From source file:edu.umn.cs.spatialHadoop.operations.Skyline.java

License:Open Source License

/**
 * Computes the skyline of an input file using a single machine algorithm.
 * The output is written to the output file. If output file is null, the
 * output is just thrown away.//w  ww.j  a v  a  2 s. c  o  m
 * @param inFile
 * @param outFile
 * @param params
 * @throws IOException
 * @throws InterruptedException
 */
public static void skylineLocal(Path inFile, Path outFile, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inFile);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    final Direction dir = params.getDirection("dir", Direction.MaxMax);

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Point[]> allLists = Parallel.forEach(splits.size(), new RunnableRange<Point[]>() {
        @Override
        public Point[] run(int i1, int i2) {
            try {
                List<Point> finalPoints = new ArrayList<Point>();
                final int MaxSize = 100000;
                Point[] points = new Point[MaxSize];
                int size = 0;
                for (int i = i1; i < i2; i++) {
                    org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits
                            .get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points[size++] = p.clone();
                            if (size >= points.length) {
                                // Perform Skyline and write the result to finalPoints
                                Point[] skylinePoints = skylineInMemory(points, dir);
                                for (Point skylinePoint : skylinePoints)
                                    finalPoints.add(skylinePoint);
                                size = 0; // reset
                            }
                        }
                    }
                    reader.close();
                }
                while (size-- > 0)
                    finalPoints.add(points[size]);
                return finalPoints.toArray(new Point[finalPoints.size()]);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (Point[] list : allLists)
        totalNumPoints += list.length;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (Point[] list : allLists) {
        System.arraycopy(list, 0, allPoints, pointer, list.length);
        pointer += list.length;
    }
    allLists.clear(); // To the let the GC collect it

    Point[] skyline = skylineInMemory(allPoints, dir);

    if (outFile != null) {
        if (params.getBoolean("overwrite", false)) {
            FileSystem outFs = outFile.getFileSystem(new Configuration());
            outFs.delete(outFile, true);
        }
        GridRecordWriter<Point> out = new GridRecordWriter<Point>(outFile, null, null, null);
        for (Point pt : skyline) {
            out.write(NullWritable.get(), pt);
        }
        out.close(null);
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.Union.java

License:Open Source License

private static <S extends OGCJTSShape> void unionLocal(Path inPath, Path outPath, final OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    // 1- Split the input path/file to get splits that can be processed independently
    final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>();
    Job job = Job.getInstance(params);/*from   ww w .j av a 2  s .  c o  m*/
    SpatialInputFormat3.setInputPaths(job, inPath);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors());

    // 2- Process splits in parallel
    final List<Float> progresses = new Vector<Float>();
    final IntWritable overallProgress = new IntWritable(0);
    List<List<Geometry>> results = Parallel.forEach(splits.size(), new RunnableRange<List<Geometry>>() {
        @Override
        public List<Geometry> run(final int i1, final int i2) {
            final int pi;
            final IntWritable splitsProgress = new IntWritable();
            synchronized (progresses) {
                pi = progresses.size();
                progresses.add(0f);
            }
            final float progressRatio = (i2 - i1) / (float) splits.size();
            Progressable progress = new Progressable.NullProgressable() {
                @Override
                public void progress(float p) {
                    progresses.set(pi, p * ((splitsProgress.get() - i1) / (float) (i2 - i1)) * progressRatio);
                    float sum = 0;
                    for (float f : progresses)
                        sum += f;
                    int newProgress = (int) (sum * 100);
                    if (newProgress > overallProgress.get()) {
                        overallProgress.set(newProgress);
                        LOG.info("Local union progress " + newProgress + "%");
                    }
                }
            };

            final List<Geometry> localUnion = new ArrayList<Geometry>();
            ResultCollector<Geometry> output = new ResultCollector<Geometry>() {
                @Override
                public void collect(Geometry r) {
                    localUnion.add(r);
                }
            };

            final int MaxBatchSize = 100000;
            Geometry[] batch = new Geometry[MaxBatchSize];
            int batchSize = 0;
            for (int i = i1; i < i2; i++) {
                splitsProgress.set(i);
                try {
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit,
                            null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<S> shapes = reader.getCurrentValue();
                        for (S s : shapes) {
                            if (s.geom == null)
                                continue;
                            batch[batchSize++] = s.geom;
                            if (batchSize >= MaxBatchSize) {
                                SpatialAlgorithms.multiUnion(batch, progress, output);
                                batchSize = 0;
                            }
                        }
                    }
                    reader.close();
                } catch (IOException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                } catch (InterruptedException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                }
            }
            // Union all remaining geometries
            try {
                Geometry[] finalBatch = new Geometry[batchSize];
                System.arraycopy(batch, 0, finalBatch, 0, batchSize);
                SpatialAlgorithms.multiUnion(finalBatch, progress, output);
                return localUnion;
            } catch (IOException e) {
                // Should never happen as the context is passed as null
                throw new RuntimeException("Error in local union", e);
            }
        }
    }, parallelism);

    // Write result to output
    LOG.info("Merge the results of all splits");
    int totalNumGeometries = 0;
    for (List<Geometry> result : results)
        totalNumGeometries += result.size();
    List<Geometry> allInOne = new ArrayList<Geometry>(totalNumGeometries);
    for (List<Geometry> result : results)
        allInOne.addAll(result);

    final S outShape = (S) params.getShape("shape");
    final PrintStream out;
    if (outPath == null || !params.getBoolean("output", true)) {
        // Skip writing the output
        out = new PrintStream(new NullOutputStream());
    } else {
        FileSystem outFS = outPath.getFileSystem(params);
        out = new PrintStream(outFS.create(outPath));
    }

    SpatialAlgorithms.multiUnion(allInOne.toArray(new Geometry[allInOne.size()]),
            new Progressable.NullProgressable() {
                int lastProgress = 0;

                public void progress(float p) {
                    int newProgresss = (int) (p * 100);
                    if (newProgresss > lastProgress) {
                        LOG.info("Global union progress " + (lastProgress = newProgresss) + "%");
                    }
                }
            }, new ResultCollector<Geometry>() {
                Text line = new Text2();

                @Override
                public void collect(Geometry r) {
                    outShape.geom = r;
                    outShape.toText(line);
                    out.println(line);
                }
            });
    out.close();
}