List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:edu.umn.cs.spatialHadoop.nasa.MakeHDFVideo.java
License:Open Source License
/** * @param args/* w ww . j av a 2s . com*/ * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); if (!params.checkInputOutput()) { System.exit(1); } //Path input = params.getPaths()[0]; Path output = params.getPaths()[1]; boolean recoverHoles = params.is("recoverholes"); boolean addDate = params.is("adddate"); Vector<String> vargs = new Vector<String>(Arrays.asList(args)); Rectangle plotRange = (Rectangle) params.getShape("rect"); if (plotRange != null && recoverHoles) { // Extend the plot range to improve the quality of RecoverHoles for (int i = 0; i < vargs.size();) { if (vargs.get(i).startsWith("rect:") || vargs.get(i).startsWith("mbr:") || vargs.get(i).startsWith("width:") || vargs.get(i).startsWith("height:")) { vargs.remove(i); } else { i++; } } double w = plotRange.getWidth(); double h = plotRange.getHeight(); plotRange = plotRange.buffer(w / 2, h / 2); int new_width = params.getInt("width", 1000) * 2; int new_height = params.getInt("height", 1000) * 2; vargs.add(plotRange.toText(new Text("rect:")).toString()); vargs.add("width:" + new_width); vargs.add("height:" + new_height); } // 1- Call HDF plot to generate all images HDFPlot.main(vargs.toArray(new String[vargs.size()])); // 2- Call RecoverHoles to recover holes (surprise) if (recoverHoles) { RecoverHoles.recoverInterpolationDir(output); if (plotRange != null) { // Need to crop all images to restore original selection cropImages(output, (Rectangle) params.getShape("rect"), plotRange); } } if (addDate) { RecoverHoles.addDate(output); } FileSystem outFs = output.getFileSystem(params); FileStatus[] generatedImages = outFs.listStatus(output, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().toLowerCase().endsWith(".png"); } }); if (generatedImages.length == 0) { Log.warn("No generated images"); System.exit(1); } InputStream inStream = outFs.open(generatedImages[0].getPath()); BufferedImage firstImage = ImageIO.read(inStream); inStream.close(); int imageWidth = firstImage.getWidth(); int imageHeight = firstImage.getHeight(); String scaleRangeStr = params.get("scale-range"); if (scaleRangeStr != null) { String[] parts = scaleRangeStr.split("\\.\\."); MinMax scaleRange = new MinMax(); scaleRange.minValue = Integer.parseInt(parts[0]); scaleRange.maxValue = Integer.parseInt(parts[1]); HDFPlot2.drawScale(new Path(output, "scale.png"), scaleRange, 64, imageHeight); } InputStream logoInputStream = MakeHDFVideo.class.getResourceAsStream("/gistic_logo.png"); OutputStream logoOutputStream = outFs.create(new Path(output, "gistic_logo.png")); byte[] buffer = new byte[4096]; int size = 0; while ((size = logoInputStream.read(buffer)) > 0) { logoOutputStream.write(buffer, 0, size); } logoOutputStream.close(); // Rename files to be ready to use with ffmpeg FileStatus[] all_images = outFs.listStatus(output, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().matches("\\d+\\.\\d+\\.\\d+\\.png"); } }); Arrays.sort(all_images, new Comparator<FileStatus>() { @Override public int compare(FileStatus f1, FileStatus f2) { return f1.getPath().getName().compareTo(f2.getPath().getName()); } }); int day = 1; for (FileStatus image : all_images) { String newFileName = String.format("day_%03d.png", day++); outFs.rename(image.getPath(), new Path(output, newFileName)); } // Plot the overlay image Path overlay = params.get("overlay") == null ? null : new Path(params.get("overlay")); if (overlay != null) { // Draw an overlay image OperationsParams plotParams = new OperationsParams(params); // Keep all arguments except input and output which change for each call // to Plot or PlotPyramid plotParams.clearAllPaths(); Path overlayOutput = new Path(output, "overlay.png"); plotParams.setClass("shape", OSMPolygon.class, Shape.class); GeometricPlot.plot(new Path[] { overlay }, overlayOutput, plotParams); } String video_command; if (overlay != null) { video_command = "avconv -r 4 -i day_%3d.png " + "-vf \"movie=gistic_logo.png [watermark]; " + "movie=overlay.png [ways]; " + "movie=scale.png [scale]; " + "[in] crop=" + plotRange.getWidth() + ":" + plotRange.getHeight() + "[in]; " + "[ways] crop=" + plotRange.getWidth() + ":" + plotRange.getHeight() + "[ways]; " + "[in][watermark] overlay=main_w-overlay_w-10:10 [mid]; " + "[mid][ways] overlay=0:0 [mid2]; " + "[mid2] pad=iw+64:ih [mid3]; " + "[mid3][scale] overlay=main_w-overlay_w:0 [out]\" " + "-r 4 -pix_fmt yuv420p output.mp4 "; } else { video_command = "avconv -r 4 -i day_%3d.png -vf " + "\"movie=gistic_logo.png [watermark]; " + "movie=scale.png [scale]; " + "[in][watermark] overlay=main_w-overlay_w-10:10 [mid]; " + "[mid] pad=iw+64:ih [mid2]; " + "[mid2][scale] overlay=main_w-overlay_w:0 [out]\" " + "-r 4 -pix_fmt yuv420p output.mp4 "; } PrintStream video_script = new PrintStream(outFs.create(new Path(output, "make_video.sh"))); video_script.println(video_command); video_script.close(); }
From source file:edu.umn.cs.spatialHadoop.nasa.MultiHDFPlot.java
License:Open Source License
public static boolean multiplot(Path[] input, Path output, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException, ParseException { String timeRange = params.get("time"); final Date dateFrom, dateTo; final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd"); try {/*from ww w. j av a 2 s. c o m*/ String[] parts = timeRange.split("\\.\\."); dateFrom = dateFormat.parse(parts[0]); dateTo = dateFormat.parse(parts[1]); } catch (ArrayIndexOutOfBoundsException e) { System.err.println("Use the seperator two periods '..' to seperate from and to dates"); return false; // To avoid an error that causes dateFrom to be uninitialized } catch (ParseException e) { System.err.println("Illegal date format in " + timeRange); return false; } // Number of frames to combine in each image int combine = params.getInt("combine", 1); // Retrieve all matching input directories based on date range Vector<Path> matchingPathsV = new Vector<Path>(); for (Path inputFile : input) { FileSystem inFs = inputFile.getFileSystem(params); FileStatus[] matchingDirs = inFs.listStatus(input, new PathFilter() { @Override public boolean accept(Path p) { String dirName = p.getName(); try { Date date = dateFormat.parse(dirName); return date.compareTo(dateFrom) >= 0 && date.compareTo(dateTo) <= 0; } catch (ParseException e) { LOG.warn("Cannot parse directory name: " + dirName); return false; } } }); for (FileStatus matchingDir : matchingDirs) matchingPathsV.add(new Path(matchingDir.getPath(), "*.hdf")); } if (matchingPathsV.isEmpty()) { LOG.warn("No matching directories to given input"); return false; } Path[] matchingPaths = matchingPathsV.toArray(new Path[matchingPathsV.size()]); Arrays.sort(matchingPaths); // Clear all paths to ensure we set our own paths for each job params.clearAllPaths(); // Create a water mask if we need to recover holes on write if (params.get("recover", "none").equals("write")) { // Recover images on write requires a water mask image to be generated first OperationsParams wmParams = new OperationsParams(params); wmParams.setBoolean("background", false); Path wmImage = new Path(output, new Path("water_mask")); HDFPlot.generateWaterMask(wmImage, wmParams); params.set(HDFPlot.PREPROCESSED_WATERMARK, wmImage.toString()); } // Start a job for each path int imageWidth = -1; int imageHeight = -1; boolean overwrite = params.getBoolean("overwrite", false); boolean pyramid = params.getBoolean("pyramid", false); FileSystem outFs = output.getFileSystem(params); Vector<Job> jobs = new Vector<Job>(); boolean background = params.getBoolean("background", false); Rectangle mbr = new Rectangle(-180, -90, 180, 90); for (int i = 0; i < matchingPaths.length; i += combine) { Path[] inputPaths = new Path[Math.min(combine, matchingPaths.length - i)]; System.arraycopy(matchingPaths, i, inputPaths, 0, inputPaths.length); Path outputPath = new Path(output, inputPaths[0].getParent().getName() + (pyramid ? "" : ".png")); if (overwrite || !outFs.exists(outputPath)) { // Need to plot Job rj = HDFPlot.plotHeatMap(inputPaths, outputPath, params); if (imageHeight == -1 || imageWidth == -1) { if (rj != null) { imageHeight = rj.getConfiguration().getInt("height", 1000); imageWidth = rj.getConfiguration().getInt("width", 1000); mbr = (Rectangle) OperationsParams.getShape(rj.getConfiguration(), "mbr"); } else { imageHeight = params.getInt("height", 1000); imageWidth = params.getInt("width", 1000); mbr = (Rectangle) OperationsParams.getShape(params, "mbr"); } } if (background && rj != null) jobs.add(rj); } } // Wait until all jobs are done while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob.getJobID()); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); throw new RuntimeException("Error running job " + firstJob.getJobID()); } jobs.remove(0); } // Draw the scale in the output path if needed String scalerange = params.get("scalerange"); if (scalerange != null) { String[] parts = scalerange.split("\\.\\."); double min = Double.parseDouble(parts[0]); double max = Double.parseDouble(parts[1]); String scale = params.get("scale", "none").toLowerCase(); if (scale.equals("vertical")) { MultiHDFPlot.drawVerticalScale(new Path(output, "scale.png"), min, max, 64, imageHeight, params); } else if (scale.equals("horizontal")) { MultiHDFPlot.drawHorizontalScale(new Path(output, "scale.png"), min, max, imageWidth, 64, params); } } // Add the KML file createKML(outFs, output, mbr, params); return true; }
From source file:edu.umn.cs.spatialHadoop.nasa.MultiHDFPlot.java
License:Open Source License
private static void createKML(FileSystem outFs, Path output, Rectangle mbr, OperationsParams params) throws IOException, ParseException { FileStatus[] all_images = outFs.listStatus(output, new PathFilter() { @Override/* w ww. j av a 2 s . c om*/ public boolean accept(Path path) { return path.getName().matches("\\d+\\.\\d+\\.\\d+\\.png"); } }); Path kmlPath = new Path(output, "index.kml"); PrintStream ps = new PrintStream(outFs.create(kmlPath)); ps.println("<?xml version='1.0' encoding='UTF-8'?>"); ps.println("<kml xmlns='http://www.opengis.net/kml/2.2'>"); ps.println("<Folder>"); String mbrStr = String.format( "<LatLonBox><west>%f</west><south>%f</south><east>%f</east><north>%f</north></LatLonBox>", mbr.x1, mbr.y1, mbr.x2, mbr.y2); for (FileStatus image : all_images) { SimpleDateFormat fileDateFormat = new SimpleDateFormat("yyyy.MM.dd"); SimpleDateFormat kmlDateFormat = new SimpleDateFormat("yyyy-MM-dd"); String name = image.getPath().getName(); int dotIndex = name.lastIndexOf('.'); name = name.substring(0, dotIndex); Date date = fileDateFormat.parse(name); String kmlDate = kmlDateFormat.format(date); ps.println("<GroundOverlay>"); ps.println("<name>" + kmlDate + "</name>"); ps.println("<TimeSpan>"); ps.println("<begin>" + kmlDate + "</begin>"); ps.println("<end>" + kmlDateFormat.format(date.getTime() + OneDayMillis) + "</end>"); ps.println("</TimeSpan>"); ps.println("<Icon><href>" + image.getPath().getName() + "</href></Icon>"); ps.println(mbrStr); ps.println("</GroundOverlay>"); } String scale = params.get("scale", "none").toLowerCase(); if (scale.equals("vertical")) { ps.println("<ScreenOverlay>"); ps.println("<name>Scale</name>"); ps.println("<Icon><href>scale.png</href></Icon>"); ps.println("<overlayXY x='1' y='0.5' xunits='fraction' yunits='fraction'/>"); ps.println("<screenXY x='1' y='0.5' xunits='fraction' yunits='fraction'/>"); ps.println("<rotationXY x='0' y='0' xunits='fraction' yunits='fraction'/>"); ps.println("<size x='0' y='0.7' xunits='fraction' yunits='fraction'/>"); ps.println("</ScreenOverlay>"); } else if (scale.equals("horizontal")) { ps.println("<ScreenOverlay>"); ps.println("<name>Scale</name>"); ps.println("<Icon><href>scale.png</href></Icon>"); ps.println("<overlayXY x='0.5' y='0' xunits='fraction' yunits='fraction'/>"); ps.println("<screenXY x='0.5' y='0' xunits='fraction' yunits='fraction'/>"); ps.println("<rotationXY x='0' y='0' xunits='fraction' yunits='fraction'/>"); ps.println("<size x='0.7' y='0' xunits='fraction' yunits='fraction'/>"); ps.println("</ScreenOverlay>"); } ps.println("</Folder>"); ps.println("</kml>"); ps.close(); }
From source file:edu.umn.cs.spatialHadoop.nasa.MultiHDFPlot.java
License:Open Source License
private static void createVideo(FileSystem outFs, Path output, boolean addLogo) throws IOException { // Rename all generated files to be day_%3d.png // Rename files to be ready to use with ffmpeg FileStatus[] all_images = outFs.listStatus(output, new PathFilter() { @Override//w w w.j a v a 2 s .c o m public boolean accept(Path path) { return path.getName().matches("\\d+\\.\\d+\\.\\d+\\.png"); } }); Arrays.sort(all_images, new Comparator<FileStatus>() { @Override public int compare(FileStatus f1, FileStatus f2) { return f1.getPath().getName().compareTo(f2.getPath().getName()); } }); int day = 1; for (FileStatus image : all_images) { String newFileName = String.format("day_%03d.png", day++); outFs.rename(image.getPath(), new Path(output, newFileName)); } String videoCommand; if (addLogo) { // Puts frames together into a video videoCommand = "avconv -r 4 -i day_%3d.png -vf " + "\"movie=gistic_logo.png [watermark]; " + "movie=scale.png [scale]; " + "[in][watermark] overlay=main_w-overlay_w-10:10 [mid]; " + "[mid] pad=iw+64:ih [mid2]; " + "[mid2][scale] overlay=main_w-overlay_w:0 [out]\" " + "-r 4 -pix_fmt yuv420p output.mp4 "; } else { videoCommand = "avconv -r 4 -i day_%3d.png -vf \"" + "movie=scale.png [scale]; " + "[in] pad=iw+64:ih [mid2]; " + "[mid2][scale] overlay=main_w-overlay_w:0 [out]\" " + "-r 4 -pix_fmt yuv420p output.mp4 "; } System.out.println("Run the following command to generate the video"); System.out.println(videoCommand); }
From source file:edu.umn.cs.spatialHadoop.nasa.SpatioAggregateQueries.java
License:Open Source License
/** * Performs a spatio-temporal aggregate query on an indexed directory * @param inFile/*from w w w .ja va 2 s .c o m*/ * @param params * @throws ParseException * @throws IOException * @throws InterruptedException */ public static AggregateQuadTree.Node aggregateQuery(Path inFile, OperationsParams params) throws ParseException, IOException, InterruptedException { // 1- Find matching temporal partitions final FileSystem fs = inFile.getFileSystem(params); Vector<Path> matchingPartitions = selectTemporalPartitions(inFile, params); // 2- Find all matching files (AggregateQuadTrees) in matching partitions final Rectangle spatialRange = params.getShape("rect", new Rectangle()).getMBR(); // Convert spatialRange from lat/lng space to Sinusoidal space double cosPhiRad = Math.cos(spatialRange.y1 * Math.PI / 180); double southWest = spatialRange.x1 * cosPhiRad; double southEast = spatialRange.x2 * cosPhiRad; cosPhiRad = Math.cos(spatialRange.y2 * Math.PI / 180); double northWest = spatialRange.x1 * cosPhiRad; double northEast = spatialRange.x2 * cosPhiRad; spatialRange.x1 = Math.min(northWest, southWest); spatialRange.x2 = Math.max(northEast, southEast); // Convert to the h v space used by MODIS spatialRange.x1 = (spatialRange.x1 + 180.0) / 10.0; spatialRange.x2 = (spatialRange.x2 + 180.0) / 10.0; spatialRange.y2 = (90.0 - spatialRange.y2) / 10.0; spatialRange.y1 = (90.0 - spatialRange.y1) / 10.0; // Vertically flip because the Sinusoidal space increases to the south double tmp = spatialRange.y2; spatialRange.y2 = spatialRange.y1; spatialRange.y1 = tmp; // Find the range of cells in MODIS Sinusoidal grid overlapping the range final int h1 = (int) Math.floor(spatialRange.x1); final int h2 = (int) Math.ceil(spatialRange.x2); final int v1 = (int) Math.floor(spatialRange.y1); final int v2 = (int) Math.ceil(spatialRange.y2); PathFilter rangeFilter = new PathFilter() { @Override public boolean accept(Path p) { Matcher matcher = MODISTileID.matcher(p.getName()); if (!matcher.matches()) return false; int h = Integer.parseInt(matcher.group(1)); int v = Integer.parseInt(matcher.group(2)); return h >= h1 && h < h2 && v >= v1 && v < v2; } }; final Vector<Path> allMatchingFiles = new Vector<Path>(); for (Path matchingPartition : matchingPartitions) { // Select all matching files FileStatus[] matchingFiles = fs.listStatus(matchingPartition, rangeFilter); for (FileStatus matchingFile : matchingFiles) { allMatchingFiles.add(matchingFile.getPath()); } } //noinspection SizeReplaceableByIsEmpty if (allMatchingFiles.isEmpty()) return null; final int resolution = AggregateQuadTree.getResolution(fs, allMatchingFiles.get(0)); // 3- Query all matching files in parallel List<Node> threadsResults = Parallel.forEach(allMatchingFiles.size(), new RunnableRange<AggregateQuadTree.Node>() { @Override public Node run(int i1, int i2) { Node threadResult = new AggregateQuadTree.Node(); for (int i_file = i1; i_file < i2; i_file++) { Path matchingFile = allMatchingFiles.get(i_file); try { Matcher matcher = MODISTileID.matcher(matchingFile.getName()); matcher.matches(); // It has to match int h = Integer.parseInt(matcher.group(1)); int v = Integer.parseInt(matcher.group(2)); // Clip the query region and normalize in this tile Rectangle translated = spatialRange.translate(-h, -v); int x1 = (int) (Math.max(translated.x1, 0) * resolution); int y1 = (int) (Math.max(translated.y1, 0) * resolution); int x2 = (int) (Math.min(translated.x2, 1.0) * resolution); int y2 = (int) (Math.min(translated.y2, 1.0) * resolution); AggregateQuadTree.Node fileResult = AggregateQuadTree.aggregateQuery(fs, matchingFile, new java.awt.Rectangle(x1, y1, (x2 - x1), (y2 - y1))); threadResult.accumulate(fileResult); } catch (Exception e) { throw new RuntimeException("Error reading file " + matchingFile, e); } } return threadResult; } }); AggregateQuadTree.Node finalResult = new AggregateQuadTree.Node(); for (Node threadResult : threadsResults) { finalResult.accumulate(threadResult); } numOfTreesTouchesInLastRequest = allMatchingFiles.size(); return finalResult; }
From source file:edu.umn.cs.spatialHadoop.nasa.SpatioAggregateQueries.java
License:Open Source License
/** * Performs a spatio-temporal aggregate query on an indexed directory * @param inFile// w w w.jav a 2 s . c o m * @param params * @throws ParseException * @throws IOException * @throws InterruptedException */ public static long selectionQuery(Path inFile, final ResultCollector<NASAPoint> output, OperationsParams params) throws ParseException, IOException, InterruptedException { // 1- Find matching temporal partitions final FileSystem fs = inFile.getFileSystem(params); Vector<Path> matchingPartitions = selectTemporalPartitions(inFile, params); // 2- Find the matching tile and the position in that tile final Point queryPoint = (Point) params.getShape("point"); final double userQueryLon = queryPoint.x; final double userQueryLat = queryPoint.y; // Convert query point from lat/lng space to Sinusoidal space double cosPhiRad = Math.cos(queryPoint.y * Math.PI / 180); double projectedX = queryPoint.x * cosPhiRad; queryPoint.x = (projectedX + 180.0) / 10.0; queryPoint.y = (90.0 - queryPoint.y) / 10.0; final int h = (int) Math.floor(queryPoint.x); final int v = (int) Math.floor(queryPoint.y); final String tileID = String.format("h%02dv%02d", h, v); PathFilter rangeFilter = new PathFilter() { @Override public boolean accept(Path p) { return p.getName().indexOf(tileID) >= 0; } }; final Vector<Path> allMatchingFiles = new Vector<Path>(); for (Path matchingPartition : matchingPartitions) { // Select all matching files FileStatus[] matchingFiles = fs.listStatus(matchingPartition, rangeFilter); for (FileStatus matchingFile : matchingFiles) { allMatchingFiles.add(matchingFile.getPath()); } } // All matching files are supposed to have the same resolution final int resolution = AggregateQuadTree.getResolution(fs, allMatchingFiles.get(0)); final java.awt.Point queryInMatchingTile = new java.awt.Point(); queryInMatchingTile.x = (int) Math.floor((queryPoint.x - h) * resolution); queryInMatchingTile.y = (int) Math.floor((queryPoint.y - v) * resolution); // 3- Query all matching files in parallel List<Long> threadsResults = Parallel.forEach(allMatchingFiles.size(), new RunnableRange<Long>() { @Override public Long run(int i1, int i2) { ResultCollector<AggregateQuadTree.PointValue> internalOutput = output == null ? null : new ResultCollector<AggregateQuadTree.PointValue>() { NASAPoint middleValue = new NASAPoint(userQueryLon, userQueryLat, 0, 0); @Override public void collect(AggregateQuadTree.PointValue value) { middleValue.value = value.value; middleValue.timestamp = value.timestamp; output.collect(middleValue); } }; long numOfResults = 0; for (int i_file = i1; i_file < i2; i_file++) { try { Path matchingFile = allMatchingFiles.get(i_file); java.awt.Rectangle query = new java.awt.Rectangle(queryInMatchingTile.x, queryInMatchingTile.y, 1, 1); AggregateQuadTree.selectionQuery(fs, matchingFile, query, internalOutput); } catch (IOException e) { e.printStackTrace(); } } return numOfResults; } }); long totalResults = 0; for (long result : threadsResults) { totalResults += result; } return totalResults; }
From source file:edu.umn.cs.spatialHadoop.nasa.SpatioTemporalAggregateQuery.java
License:Open Source License
/** * Performs a spatio-temporal aggregate query on an indexed directory * @param inFile/* ww w . j a va 2s. c om*/ * @param params * @throws ParseException * @throws IOException */ public static AggregateQuadTree.Node aggregateQuery(Path inFile, OperationsParams params) throws ParseException, IOException { // 1- Run a temporal filter step to find all matching temporal partitions Vector<Path> matchingPartitions = new Vector<Path>(); // List of time ranges to check. Initially it contains one range as // specified by the user. Eventually, it can be split into at most two // partitions if partially matched by a partition. Vector<TimeRange> temporalRanges = new Vector<TimeRange>(); temporalRanges.add(new TimeRange(params.get("time"))); Path[] temporalIndexes = new Path[] { new Path(inFile, "yearly"), new Path(inFile, "monthly"), new Path(inFile, "daily") }; int index = 0; final FileSystem fs = inFile.getFileSystem(params); while (index < temporalIndexes.length && !temporalRanges.isEmpty()) { Path indexDir = temporalIndexes[index]; LOG.info("Checking index dir " + indexDir); TemporalIndex temporalIndex = new TemporalIndex(fs, indexDir); for (int iRange = 0; iRange < temporalRanges.size(); iRange++) { TimeRange range = temporalRanges.get(iRange); TemporalPartition[] matches = temporalIndex.selectContained(range.start, range.end); if (matches != null) { LOG.info("Matched " + matches.length + " partitions in " + indexDir); for (TemporalPartition match : matches) { LOG.info("Matched temporal partition: " + match.dirName); matchingPartitions.add(new Path(indexDir, match.dirName)); } // Update range to remove matching part TemporalPartition firstMatch = matches[0]; TemporalPartition lastMatch = matches[matches.length - 1]; if (range.start < firstMatch.start && range.end > lastMatch.end) { // Need to split the range into two temporalRanges.setElementAt(new TimeRange(range.start, firstMatch.start), iRange); temporalRanges.insertElementAt(new TimeRange(lastMatch.end, range.end), iRange); } else if (range.start < firstMatch.start) { // Update range in-place range.end = firstMatch.start; } else if (range.end > lastMatch.end) { // Update range in-place range.start = lastMatch.end; } else { // Current range was completely covered. Remove it temporalRanges.remove(iRange); } } } index++; } numOfTemporalPartitionsInLastQuery = matchingPartitions.size(); // 2- Find all matching files (AggregateQuadTrees) in matching partitions final Rectangle spatialRange = params.getShape("rect", new Rectangle()).getMBR(); // Convert spatialRange from lat/lng space to Sinusoidal space double cosPhiRad = Math.cos(spatialRange.y1 * Math.PI / 180); double southWest = spatialRange.x1 * cosPhiRad; double southEast = spatialRange.x2 * cosPhiRad; cosPhiRad = Math.cos(spatialRange.y2 * Math.PI / 180); double northWest = spatialRange.x1 * cosPhiRad; double northEast = spatialRange.x2 * cosPhiRad; spatialRange.x1 = Math.min(northWest, southWest); spatialRange.x2 = Math.max(northEast, southEast); // Convert to the h v space used by MODIS spatialRange.x1 = (spatialRange.x1 + 180.0) / 10.0; spatialRange.x2 = (spatialRange.x2 + 180.0) / 10.0; spatialRange.y2 = (90.0 - spatialRange.y2) / 10.0; spatialRange.y1 = (90.0 - spatialRange.y1) / 10.0; // Vertically flip because the Sinusoidal space increases to the south double tmp = spatialRange.y2; spatialRange.y2 = spatialRange.y1; spatialRange.y1 = tmp; // Find the range of cells in MODIS Sinusoidal grid overlapping the range final int h1 = (int) Math.floor(spatialRange.x1); final int h2 = (int) Math.ceil(spatialRange.x2); final int v1 = (int) Math.floor(spatialRange.y1); final int v2 = (int) Math.ceil(spatialRange.y2); PathFilter rangeFilter = new PathFilter() { @Override public boolean accept(Path p) { Matcher matcher = MODISTileID.matcher(p.getName()); if (!matcher.matches()) return false; int h = Integer.parseInt(matcher.group(1)); int v = Integer.parseInt(matcher.group(2)); return h >= h1 && h < h2 && v >= v1 && v < v2; } }; final Vector<Path> allMatchingFiles = new Vector<Path>(); for (Path matchingPartition : matchingPartitions) { // Select all matching files FileStatus[] matchingFiles = fs.listStatus(matchingPartition, rangeFilter); for (FileStatus matchingFile : matchingFiles) { allMatchingFiles.add(matchingFile.getPath()); } } // 3- Query all matching files in parallel Vector<Node> threadsResults = Parallel.forEach(allMatchingFiles.size(), new RunnableRange<AggregateQuadTree.Node>() { @Override public Node run(int i1, int i2) { Node threadResult = new AggregateQuadTree.Node(); for (int i_file = i1; i_file < i2; i_file++) { try { Path matchingFile = allMatchingFiles.get(i_file); Matcher matcher = MODISTileID.matcher(matchingFile.getName()); matcher.matches(); // It has to match int h = Integer.parseInt(matcher.group(1)); int v = Integer.parseInt(matcher.group(2)); // Clip the query region and normalize in this tile Rectangle translated = spatialRange.translate(-h, -v); int x1 = (int) (Math.max(translated.x1, 0) * 1200); int y1 = (int) (Math.max(translated.y1, 0) * 1200); int x2 = (int) (Math.min(translated.x2, 1.0) * 1200); int y2 = (int) (Math.min(translated.y2, 1.0) * 1200); AggregateQuadTree.Node fileResult = AggregateQuadTree.aggregateQuery(fs, matchingFile, new java.awt.Rectangle(x1, y1, (x2 - x1), (y2 - y1))); threadResult.accumulate(fileResult); } catch (IOException e) { e.printStackTrace(); } } return threadResult; } }); AggregateQuadTree.Node finalResult = new AggregateQuadTree.Node(); for (Node threadResult : threadsResults) finalResult.accumulate(threadResult); numOfTreesTouchesInLastRequest = allMatchingFiles.size(); return finalResult; }
From source file:edu.umn.cs.spatialHadoop.operations.ClosestPairHadoop.java
License:Open Source License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing/*w ww. jav a 2 s . com*/ * @param conf * @param fs * @param file * @return * @throws IOException */ public static <S extends Shape> void cloesetPair(Path file, OperationsParams params) throws IOException { // Try to get file MBR from the MBRs of blocks JobConf job = new JobConf(params, ClosestPairHadoop.class); Path outputPath; FileSystem outFs = FileSystem.get(job); do { outputPath = new Path(file.getName() + ".closest_pair_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); outFs.delete(outputPath, true); job.setJobName("ClosestPair"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Point.class); job.setMapperClass(Map0.class); job.setReducerClass(Reduce0.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setInputFormat(ShapeArrayInputFormat.class); // job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.setInputPaths(job, file); job.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job JobClient.runJob(job); ////////////////////////////////////////////////////////////////////////// System.out.println("Begin second round!"); // 2nd Round job = new JobConf(params, ClosestPairHadoop.class); job.setJobName("Second Round"); job.setOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Point.class); job.setMapperClass(Map1.class); job.setReducerClass(Reduce1.class); clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setInputFormat(ShapeArrayInputFormat.class); // job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.setInputPaths(job, outputPath); // The previous output is the current input Path newPath = new Path(outputPath.getName() + "_result"); job.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, newPath); JobClient.runJob(job); }
From source file:edu.umn.cs.spatialHadoop.operations.Head.java
License:Open Source License
/** * Reads a maximum of n lines from the given file. * @param fs//from w w w .jav a2 s .c o m * @param p * @param n * @return * @throws IOException */ public static String[] head(FileSystem fs, Path p, int n) throws IOException { String[] lines = new String[n]; FileStatus fstatus = fs.getFileStatus(p); TaskAttemptContext context = createDummyContext(); LineRecordReader lineReader = new LineRecordReader(); FileSplit split; if (p.getName().endsWith(".rtree")) { // R-tree indexed file FSDataInputStream in = fs.open(p); in.skip(8); // Skip R-tree signature int treeHeaderSize = RTree.skipHeader(in); in.close(); split = new FileSplit(p, treeHeaderSize + 8, fstatus.getLen() - treeHeaderSize - 8, new String[0]); } else { split = new FileSplit(p, 0, fstatus.getLen(), new String[0]); } lineReader.initialize(split, context); int numOfLines = 0; for (numOfLines = 0; numOfLines < lines.length && lineReader.nextKeyValue(); numOfLines++) { lines[numOfLines] = lineReader.getCurrentValue().toString(); } lineReader.close(); return lines; }
From source file:edu.umn.cs.spatialHadoop.operations.KNN.java
License:Open Source License
/** * A MapReduce version of KNN query.// www . j av a 2 s.c o m * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "KNN"); job.setJarByClass(KNN.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }