List of usage examples for org.apache.hadoop.mapreduce RecordReader close
public abstract void close() throws IOException;
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
public static long spatialJoinLocal(Path[] inFiles, Path outFile, OperationsParams params) throws IOException, InterruptedException { // Read the inputs and store them in memory List<Shape>[] datasets = new List[inFiles.length]; final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); for (int i = 0; i < inFiles.length; i++) { datasets[i] = new ArrayList<Shape>(); FileSystem inFs = inFiles[i].getFileSystem(params); Job job = Job.getInstance(params); SpatialInputFormat3.addInputPath(job, inFiles[i]); for (InputSplit split : inputFormat.getSplits(job)) { FileSplit fsplit = (FileSplit) split; RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); }//from ww w . j av a2 s . c o m while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { datasets[i].add(shape.clone()); } } reader.close(); } } // Apply the spatial join algorithm ResultCollector2<Shape, Shape> output = null; PrintStream out = null; if (outFile != null) { FileSystem outFS = outFile.getFileSystem(params); out = new PrintStream(outFS.create(outFile)); final PrintStream outout = out; output = new ResultCollector2<Shape, Shape>() { @Override public void collect(Shape r, Shape s) { outout.println(r.toText(new Text()) + "," + s.toText(new Text())); } }; } long resultCount = SpatialJoin_planeSweep(datasets[0], datasets[1], output, null); if (out != null) out.close(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java
License:Open Source License
/** * Compute the Deluanay triangulation in the local machine * @param inPaths/* w w w . j a va 2s. co m*/ * @param outPath * @param params * @throws IOException * @throws InterruptedException */ public static void delaunayLocal(Path[] inPaths, Path outPath, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inPaths); final List<InputSplit> splits = inputFormat.getSplits(job); final Point[][] allLists = new Point[splits.size()][]; // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() { @Override public Integer run(int i1, int i2) { try { int numPoints = 0; for (int i = i1; i < i2; i++) { List<Point> points = new ArrayList<Point>(); FileSplit fsplit = (FileSplit) splits.get(i); final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points.add(p.clone()); } } reader.close(); numPoints += points.size(); allLists[i] = points.toArray(new Point[points.size()]); } return numPoints; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return null; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (int numPoints : numsPoints) totalNumPoints += numPoints; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (int iList = 0; iList < allLists.length; iList++) { System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length); pointer += allLists[iList].length; allLists[iList] = null; // To let the GC collect it } if (params.getBoolean("dedup", true)) { float threshold = params.getFloat("threshold", 1E-5f); allPoints = SpatialAlgorithms.deduplicatePoints(allPoints, threshold); } LOG.info("Computing DT for " + allPoints.length + " points"); GSDTAlgorithm dtAlgorithm = new GSImprovedAlgorithm(allPoints, null); LOG.info("DT computed"); Rectangle mbr = FileMBR.fileMBR(inPaths, params); double buffer = Math.max(mbr.getWidth(), mbr.getHeight()) / 10; Rectangle bigMBR = mbr.buffer(buffer, buffer); if (outPath != null && params.getBoolean("output", true)) { LOG.info("Writing the output as a soup of triangles"); Triangulation answer = dtAlgorithm.getFinalTriangulation(); FileSystem outFS = outPath.getFileSystem(params); PrintStream out = new PrintStream(outFS.create(outPath)); Text text = new Text2(); byte[] tab = "\t".getBytes(); for (Point[] triangle : answer.iterateTriangles()) { text.clear(); triangle[0].toText(text); text.append(tab, 0, tab.length); triangle[1].toText(text); text.append(tab, 0, tab.length); triangle[2].toText(text); out.println(text); } out.close(); } // dtAlgorithm.getFinalTriangulation().draw(); //Triangulation finalPart = new Triangulation(); //Triangulation nonfinalPart = new Triangulation(); //dtAlgorithm.splitIntoFinalAndNonFinalParts(new Rectangle(-180, -90, 180, 90), finalPart, nonfinalPart); }
From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java
License:Open Source License
private static void indexLocal(Path inPath, final Path outPath, OperationsParams params) throws IOException, InterruptedException { Job job = Job.getInstance(params);/* w w w .j a va2 s . c om*/ final Configuration conf = job.getConfiguration(); final String sindex = conf.get("sindex"); // Start reading input file List<InputSplit> splits = new ArrayList<InputSplit>(); final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); FileSystem inFs = inPath.getFileSystem(conf); FileStatus inFStatus = inFs.getFileStatus(inPath); if (inFStatus != null && !inFStatus.isDir()) { // One file, retrieve it immediately. // This is useful if the input is a hidden file which is automatically // skipped by FileInputFormat. We need to plot a hidden file for the case // of plotting partition boundaries of a spatial index splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0])); } else { SpatialInputFormat3.setInputPaths(job, inPath); for (InputSplit s : inputFormat.getSplits(job)) splits.add(s); } // Copy splits to a final array to be used in parallel final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]); boolean replicate = PartitionerReplicate.get(sindex); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr"); if (inputMBR == null) { inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf)); OperationsParams.setShape(conf, "mbr", inputMBR); } setLocalIndexer(conf, sindex); final Partitioner partitioner = createPartitioner(inPath, outPath, conf, sindex); final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex, outPath, conf); for (FileSplit fsplit : fsplits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, conf); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, conf); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, conf); } else { throw new RuntimeException("Unknown record reader"); } final IntWritable partitionID = new IntWritable(); while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); if (replicate) { for (final Shape s : shapes) { partitioner.overlapPartitions(s, new ResultCollector<Integer>() { @Override public void collect(Integer id) { partitionID.set(id); try { recordWriter.write(partitionID, s); } catch (IOException e) { throw new RuntimeException(e); } } }); } } else { for (final Shape s : shapes) { int pid = partitioner.overlapPartition(s); if (pid != -1) { partitionID.set(pid); recordWriter.write(partitionID, s); } } } } reader.close(); } recordWriter.close(null); // Write the WKT formatted master file Path masterPath = new Path(outPath, "_master." + sindex); FileSystem outFs = outPath.getFileSystem(params); Path wktPath = new Path(outPath, "_" + sindex + ".wkt"); PrintStream wktOut = new PrintStream(outFs.create(wktPath)); wktOut.println("ID\tBoundaries\tRecord Count\tSize\tFile name"); Text tempLine = new Text2(); Partition tempPartition = new Partition(); LineReader in = new LineReader(outFs.open(masterPath)); while (in.readLine(tempLine) > 0) { tempPartition.fromText(tempLine); wktOut.println(tempPartition.toWKT()); } in.close(); wktOut.close(); }
From source file:edu.umn.cs.spatialHadoop.operations.ClosestPair.java
License:Open Source License
/** * Computes the closest pair using a local single-machine algorithm * (no MapReduce)//from w ww. j a v a2 s .com * @param inPaths * @param params * @return * @throws IOException * @throws InterruptedException */ public static Pair closestPairLocal(Path[] inPaths, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inPaths); final List<InputSplit> splits = inputFormat.getSplits(job); final Point[][] allLists = new Point[splits.size()][]; // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() { @Override public Integer run(int i1, int i2) { int numPoints = 0; for (int i = i1; i < i2; i++) { try { List<Point> points = new ArrayList<Point>(); FileSplit fsplit = (FileSplit) splits.get(i); final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points.add(p.clone()); } } reader.close(); numPoints += points.size(); allLists[i] = points.toArray(new Point[points.size()]); } catch (IOException e) { throw new RuntimeException("Error reading file", e); } catch (InterruptedException e) { throw new RuntimeException("Error reading file", e); } } return numPoints; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (int numPoints : numsPoints) totalNumPoints += numPoints; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (int iList = 0; iList < allLists.length; iList++) { System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length); pointer += allLists[iList].length; allLists[iList] = null; // To let the GC collect it } LOG.info("Computing closest-pair for " + allPoints.length + " points"); Pair closestPair = closestPairInMemory(allPoints, params.getInt(BruteForceThreshold, 100)); return closestPair; }
From source file:edu.umn.cs.spatialHadoop.operations.ConvexHull.java
License:Open Source License
/** * Computes the convex hull of an input file using a single machine algorithm. * The output is written to the output file. If output file is null, the * output is just thrown away./* w ww .j a v a2 s . c om*/ * @param inFile * @param outFile * @param params * @throws IOException * @throws InterruptedException */ public static void convexHullLocal(Path inFile, Path outFile, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inFile); final List<InputSplit> splits = inputFormat.getSplits(job); // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Point[]> allLists = Parallel.forEach(splits.size(), new RunnableRange<Point[]>() { @Override public Point[] run(int i1, int i2) { try { List<Point> finalPoints = new ArrayList<Point>(); final int MaxSize = 100000; Point[] points = new Point[MaxSize]; int size = 0; for (int i = i1; i < i2; i++) { org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits .get(i); final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points[size++] = p.clone(); if (size >= points.length) { // Perform convex hull and write the result to finalPoints Point[] chPoints = convexHullInMemory(points); for (Point skylinePoint : chPoints) finalPoints.add(skylinePoint); size = 0; // reset } } } reader.close(); } while (size-- > 0) finalPoints.add(points[size]); return finalPoints.toArray(new Point[finalPoints.size()]); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return null; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (Point[] list : allLists) totalNumPoints += list.length; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (Point[] list : allLists) { System.arraycopy(list, 0, allPoints, pointer, list.length); pointer += list.length; } allLists.clear(); // To the let the GC collect it Point[] ch = convexHullInMemory(allPoints); if (outFile != null) { if (params.getBoolean("overwrite", false)) { FileSystem outFs = outFile.getFileSystem(new Configuration()); outFs.delete(outFile, true); } GridRecordWriter<Point> out = new GridRecordWriter<Point>(outFile, null, null, null); for (Point pt : ch) { out.write(NullWritable.get(), pt); } out.close(null); } }
From source file:edu.umn.cs.spatialHadoop.operations.FarthestPair.java
License:Open Source License
public static PairDistance farthestPairLocal(Path[] inPaths, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params);/*from www . j a va 2 s . co m*/ SpatialInputFormat3.setInputPaths(job, inPaths); final List<org.apache.hadoop.mapreduce.InputSplit> splits = inputFormat.getSplits(job); final Point[][] allLists = new Point[splits.size()][]; // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() { @Override public Integer run(int i1, int i2) { try { int numPoints = 0; for (int i = i1; i < i2; i++) { List<Point> points = new ArrayList<Point>(); org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits .get(i); final org.apache.hadoop.mapreduce.RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points.add(p.clone()); } } reader.close(); numPoints += points.size(); allLists[i] = points.toArray(new Point[points.size()]); } return numPoints; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return null; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (int numPoints : numsPoints) totalNumPoints += numPoints; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (int iList = 0; iList < allLists.length; iList++) { System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length); pointer += allLists[iList].length; allLists[iList] = null; // To let the GC collect it } LOG.info("Computing closest-pair for " + allPoints.length + " points"); long t1 = System.currentTimeMillis(); Point[] convexHull = ConvexHull.convexHullInMemory(allPoints); long t2 = System.currentTimeMillis(); PairDistance farthestPair = rotatingCallipers(convexHull); long t3 = System.currentTimeMillis(); System.out.println("Convex hull in " + (t2 - t1) / 1000.0 + " seconds " + "and rotating calipers in " + (t3 - t2) / 1000.0 + " seconds"); return farthestPair; }
From source file:edu.umn.cs.spatialHadoop.operations.KNN.java
License:Open Source License
private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params) throws IOException, InterruptedException { int iterations = 0; FileSystem fs = inFile.getFileSystem(params); Point queryPoint = (Point) OperationsParams.getShape(params, "point"); int k = params.getInt("k", 1); // Top-k objects are retained in this object PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k); SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile); double kthDistance = Double.MAX_VALUE; if (gIndex != null) { // There is a global index, use it PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNN.ShapeWithDistance<Partition>>() { {/*from ww w . ja v a 2 s . c om*/ initialize(gIndex.size()); } @Override protected boolean lessThan(Object a, Object b) { return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance; } }; for (Partition p : gIndex) { double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y); partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance)); } while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) { ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop(); // Process this partition Path partitionPath = new Path(inFile, partitionToProcess.shape.filename); long length = fs.getFileStatus(partitionPath).getLen(); FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]); RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); if (distance <= kthDistance) knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); if (knn.size() >= k) kthDistance = knn.top().distance; } } else { // No global index, have to scan the whole file Job job = new Job(params); SpatialInputFormat3.addInputPath(job, inFile); List<InputSplit> splits = inputFormat.getSplits(job); for (InputSplit split : splits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(split, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(split, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(split, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); } if (knn.size() >= k) kthDistance = knn.top().distance; } long resultCount = knn.size(); if (outPath != null && params.getBoolean("output", true)) { FileSystem outFS = outPath.getFileSystem(params); PrintStream ps = new PrintStream(outFS.create(outPath)); Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount); resultsOrdered.setSize((int) resultCount); while (knn.size() > 0) { ShapeWithDistance<S> nextAnswer = knn.pop(); resultsOrdered.set(knn.size(), nextAnswer); } Text text = new Text(); for (ShapeWithDistance<S> answer : resultsOrdered) { text.clear(); TextSerializerHelper.serializeDouble(answer.distance, text, ','); answer.shape.toText(text); ps.println(text); } ps.close(); } TotalIterations.addAndGet(iterations); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.RangeQuery.java
License:Open Source License
/** * Runs a range query on the local machine (no MapReduce) and the output is * streamed to the provided result collector. The query might run in parallel * which makes it necessary to design the result collector to accept parallel * calls to the method {@link ResultCollector#collect(Object)}. * You can use {@link ResultCollectorSynchronizer} to synchronize calls to * your ResultCollector if you cannot design yours to be thread safe. * @param inPath/*from w w w . jav a2s. c o m*/ * @param queryRange * @param shape * @param params * @param output * @return * @throws IOException * @throws InterruptedException */ public static <S extends Shape> long rangeQueryLocal(Path inPath, final Shape queryRange, final S shape, final OperationsParams params, final ResultCollector<S> output) throws IOException, InterruptedException { // Set MBR of query shape in job configuration to work with the spatial filter OperationsParams.setShape(params, SpatialInputFormat3.InputQueryRange, queryRange.getMBR()); // 1- Split the input path/file to get splits that can be processed independently final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inPath); final List<InputSplit> splits = inputFormat.getSplits(job); // 2- Process splits in parallel List<Long> results = Parallel.forEach(splits.size(), new RunnableRange<Long>() { @Override public Long run(int i1, int i2) { long results = 0; for (int i = i1; i < i2; i++) { try { FileSplit fsplit = (FileSplit) splits.get(i); final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<S> shapes = reader.getCurrentValue(); for (Shape s : shapes) { results++; if (output != null) output.collect((S) s); } } reader.close(); } catch (IOException e) { LOG.error("Error processing split " + splits.get(i), e); } catch (InterruptedException e) { LOG.error("Error processing split " + splits.get(i), e); } } return results; } }); long totalResultSize = 0; for (long result : results) totalResultSize += result; return totalResultSize; }
From source file:edu.umn.cs.spatialHadoop.operations.Skyline.java
License:Open Source License
/** * Computes the skyline of an input file using a single machine algorithm. * The output is written to the output file. If output file is null, the * output is just thrown away.//w ww.j a v a 2 s. c o m * @param inFile * @param outFile * @param params * @throws IOException * @throws InterruptedException */ public static void skylineLocal(Path inFile, Path outFile, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inFile); final List<InputSplit> splits = inputFormat.getSplits(job); final Direction dir = params.getDirection("dir", Direction.MaxMax); // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Point[]> allLists = Parallel.forEach(splits.size(), new RunnableRange<Point[]>() { @Override public Point[] run(int i1, int i2) { try { List<Point> finalPoints = new ArrayList<Point>(); final int MaxSize = 100000; Point[] points = new Point[MaxSize]; int size = 0; for (int i = i1; i < i2; i++) { org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits .get(i); final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points[size++] = p.clone(); if (size >= points.length) { // Perform Skyline and write the result to finalPoints Point[] skylinePoints = skylineInMemory(points, dir); for (Point skylinePoint : skylinePoints) finalPoints.add(skylinePoint); size = 0; // reset } } } reader.close(); } while (size-- > 0) finalPoints.add(points[size]); return finalPoints.toArray(new Point[finalPoints.size()]); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return null; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (Point[] list : allLists) totalNumPoints += list.length; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (Point[] list : allLists) { System.arraycopy(list, 0, allPoints, pointer, list.length); pointer += list.length; } allLists.clear(); // To the let the GC collect it Point[] skyline = skylineInMemory(allPoints, dir); if (outFile != null) { if (params.getBoolean("overwrite", false)) { FileSystem outFs = outFile.getFileSystem(new Configuration()); outFs.delete(outFile, true); } GridRecordWriter<Point> out = new GridRecordWriter<Point>(outFile, null, null, null); for (Point pt : skyline) { out.write(NullWritable.get(), pt); } out.close(null); } }
From source file:edu.umn.cs.spatialHadoop.operations.Union.java
License:Open Source License
private static <S extends OGCJTSShape> void unionLocal(Path inPath, Path outPath, final OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { // 1- Split the input path/file to get splits that can be processed independently final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>(); Job job = Job.getInstance(params);/*from ww w .j av a 2 s . c o m*/ SpatialInputFormat3.setInputPaths(job, inPath); final List<InputSplit> splits = inputFormat.getSplits(job); int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors()); // 2- Process splits in parallel final List<Float> progresses = new Vector<Float>(); final IntWritable overallProgress = new IntWritable(0); List<List<Geometry>> results = Parallel.forEach(splits.size(), new RunnableRange<List<Geometry>>() { @Override public List<Geometry> run(final int i1, final int i2) { final int pi; final IntWritable splitsProgress = new IntWritable(); synchronized (progresses) { pi = progresses.size(); progresses.add(0f); } final float progressRatio = (i2 - i1) / (float) splits.size(); Progressable progress = new Progressable.NullProgressable() { @Override public void progress(float p) { progresses.set(pi, p * ((splitsProgress.get() - i1) / (float) (i2 - i1)) * progressRatio); float sum = 0; for (float f : progresses) sum += f; int newProgress = (int) (sum * 100); if (newProgress > overallProgress.get()) { overallProgress.set(newProgress); LOG.info("Local union progress " + newProgress + "%"); } } }; final List<Geometry> localUnion = new ArrayList<Geometry>(); ResultCollector<Geometry> output = new ResultCollector<Geometry>() { @Override public void collect(Geometry r) { localUnion.add(r); } }; final int MaxBatchSize = 100000; Geometry[] batch = new Geometry[MaxBatchSize]; int batchSize = 0; for (int i = i1; i < i2; i++) { splitsProgress.set(i); try { FileSplit fsplit = (FileSplit) splits.get(i); final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<S> shapes = reader.getCurrentValue(); for (S s : shapes) { if (s.geom == null) continue; batch[batchSize++] = s.geom; if (batchSize >= MaxBatchSize) { SpatialAlgorithms.multiUnion(batch, progress, output); batchSize = 0; } } } reader.close(); } catch (IOException e) { LOG.error("Error processing split " + splits.get(i), e); } catch (InterruptedException e) { LOG.error("Error processing split " + splits.get(i), e); } } // Union all remaining geometries try { Geometry[] finalBatch = new Geometry[batchSize]; System.arraycopy(batch, 0, finalBatch, 0, batchSize); SpatialAlgorithms.multiUnion(finalBatch, progress, output); return localUnion; } catch (IOException e) { // Should never happen as the context is passed as null throw new RuntimeException("Error in local union", e); } } }, parallelism); // Write result to output LOG.info("Merge the results of all splits"); int totalNumGeometries = 0; for (List<Geometry> result : results) totalNumGeometries += result.size(); List<Geometry> allInOne = new ArrayList<Geometry>(totalNumGeometries); for (List<Geometry> result : results) allInOne.addAll(result); final S outShape = (S) params.getShape("shape"); final PrintStream out; if (outPath == null || !params.getBoolean("output", true)) { // Skip writing the output out = new PrintStream(new NullOutputStream()); } else { FileSystem outFS = outPath.getFileSystem(params); out = new PrintStream(outFS.create(outPath)); } SpatialAlgorithms.multiUnion(allInOne.toArray(new Geometry[allInOne.size()]), new Progressable.NullProgressable() { int lastProgress = 0; public void progress(float p) { int newProgresss = (int) (p * 100); if (newProgresss > lastProgress) { LOG.info("Global union progress " + (lastProgress = newProgresss) + "%"); } } }, new ResultCollector<Geometry>() { Text line = new Text2(); @Override public void collect(Geometry r) { outShape.geom = r; outShape.toText(line); out.println(line); } }); out.close(); }