List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:edu.umn.cs.spatialHadoop.osm.OSMToKML.java
License:Open Source License
/** * @param args//from w ww. j a v a 2s .c o m * @throws IOException */ public static void main(String[] args) throws IOException { final OperationsParams params = new OperationsParams(new GenericOptionsParser(args), false); if (!params.checkInputOutput()) { System.err.println("Please specify input and output"); System.exit(1); } params.setClass("shape", OSMPolygon.class, Shape.class); Path inputPath = params.getInputPath(); FileSystem inFs = inputPath.getFileSystem(params); ShapeArrayRecordReader in = new ShapeArrayRecordReader(params, new FileSplit(inputPath, 0, inFs.getFileStatus(inputPath).getLen(), new String[0])); Path outPath = params.getOutputPath(); FileSystem outFs = outPath.getFileSystem(params); PrintWriter out; ZipOutputStream zipOut = null; if (outPath.getName().toLowerCase().endsWith(".kmz")) { // Create a KMZ file FSDataOutputStream kmzOut = outFs.create(outPath); zipOut = new ZipOutputStream(kmzOut); zipOut.putNextEntry(new ZipEntry("osm.kml")); out = new PrintWriter(zipOut); } else { out = new PrintWriter(outFs.create(outPath)); } out.println("<?xml version='1.0' encoding='UTF-8'?>"); out.println("<kml xmlns='http://www.opengis.net/kml/2.2'>"); out.println("<Document>"); writeAllStyles(out); Rectangle key = in.createKey(); ArrayWritable values = in.createValue(); while (in.next(key, values)) { System.out.println("Read " + values.get().length); for (Shape shape : (Shape[]) values.get()) { if (shape instanceof OSMPolygon) { out.println(OSMtoKMLElement((OSMPolygon) shape)); } } out.println(); } out.println("</Document>"); out.println("</kml>"); in.close(); if (zipOut != null) { // KMZ file out.flush(); zipOut.closeEntry(); zipOut.close(); } else { // KML file out.close(); } }
From source file:edu.umn.cs.spatialHadoop.RandomSpatialGenerator.java
License:Open Source License
private static void generateMapReduce(Path outFile, OperationsParams params) throws IOException { JobConf job = new JobConf(params, RandomSpatialGenerator.class); job.setJobName("Generator"); Shape shape = params.getShape("shape"); FileSystem outFs = outFile.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); // Set input format and map class job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Repartition.RepartitionMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); String sindex = params.get("sindex"); Rectangle mbr = params.getShape("mbr").getMBR(); CellInfo[] cells;/*from ww w. j a v a 2 s . c om*/ if (sindex == null) { cells = new CellInfo[] { new CellInfo(1, mbr) }; } else if (sindex.equals("grid")) { GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); FileSystem fs = outFile.getFileSystem(job); long blocksize = fs.getDefaultBlockSize(outFile); long size = params.getSize("size"); int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, outFile, blocksize); gridInfo.calculateCellDimensions(numOfCells); cells = gridInfo.getAllCells(); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cells); // Do not set a reduce function. Use the default identity reduce function if (cells.length == 1) { // All objects are in one partition. No need for a reduce phase job.setNumReduceTasks(0); } else { // More than one partition. Need a reduce phase to group shapes of the // same partition together job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); } // Set output path FileOutputFormat.setOutputPath(job, outFile); if (sindex == null || sindex.equals("grid")) { job.setOutputFormat(GridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobClient.runJob(job); // TODO move the following part to OutputCommitter // Concatenate all master files into one file FileStatus[] resultFiles = outFs.listStatus(outFile, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains("_master"); } }); String ext = resultFiles[0].getPath().getName() .substring(resultFiles[0].getPath().getName().lastIndexOf('.')); Path masterPath = new Path(outFile, "_master" + ext); OutputStream destOut = outFs.create(masterPath); byte[] buffer = new byte[4096]; for (FileStatus f : resultFiles) { InputStream in = outFs.open(f.getPath()); int bytes_read; do { bytes_read = in.read(buffer); if (bytes_read > 0) destOut.write(buffer, 0, bytes_read); } while (bytes_read > 0); in.close(); outFs.delete(f.getPath(), false); } destOut.close(); }
From source file:edu.umn.cs.spatialHadoop.util.FileUtil.java
License:Open Source License
/** * Returns the extension of the file after removing any possible suffixes * for compression//w w w. jav a2 s.c o m * @param path * @return */ public static String getExtensionWithoutCompression(Path path) { String extension = ""; String fname = path.getName().toLowerCase(); if (compressionCodecs.getCodec(path) == null) { // File not compressed, get the extension int last_dot = fname.lastIndexOf('.'); if (last_dot >= 0) { extension = fname.substring(last_dot + 1); } } else { // File is comrpessed, get the extension before the compression int last_dot = fname.lastIndexOf('.'); if (last_dot > 0) { int prev_dot = fname.lastIndexOf('.', last_dot - 1); if (prev_dot >= 0) { extension = fname.substring(prev_dot + 1, last_dot); } } } return extension; }
From source file:edu.umn.cs.spatialHadoop.util.NASADatasetUtil.java
License:Open Source License
public static Path[] getMatchingFilesInPath(Path path, final String inputDateString) throws IOException { FileSystem fileSystem = path.getFileSystem(new Configuration()); FileStatus[] matchingDirs = fileSystem.listStatus(path, new PathFilter() { @Override//from w w w . j av a2 s . co m public boolean accept(Path p) { String dirName = p.getName(); if (dirName.contains(inputDateString)) { return true; } else { return false; } } }); Path[] paths = new Path[matchingDirs.length]; for (int i = 0; i < paths.length; i++) { paths[i] = matchingDirs[i].getPath(); } return paths; }
From source file:edu.umn.cs.spatialHadoop.util.TemporalIndexManager.java
License:Open Source License
/** * Based on a certain time range, this method filters all directories and * determines which files need to be indexed on daily, monthly and yearly * levels. After calling this method, you need to call the daily, monthly * and yearly getters to return paths required to be indexed. * @param timeRange/* w ww . j a v a 2 s.c om*/ * @throws IOException * @throws ParseException */ public void prepareNeededIndexes(String timeRange) throws IOException, ParseException { if (timeRange == null) { LOG.error("TimeRange is empty"); return; } // Parse start and end dates final Date startDate, endDate; try { startDate = dayFormat.parse(timeRange.split("\\.\\.")[0]); endDate = dayFormat.parse(timeRange.split("\\.\\.")[1]); } catch (ArrayIndexOutOfBoundsException e) { LOG.error("Use the seperator two periods '..' to seperate from and to dates"); return; } catch (ParseException e) { LOG.error("Illegal date format in " + timeRange); return; } // Filter all file/folder paths based on the start-end date range FileStatus[] matchingDirs = fileSystem.listStatus(datasetPath, new PathFilter() { @Override public boolean accept(Path p) { String dirName = p.getName(); try { Date date = dayFormat.parse(dirName); return date.compareTo(startDate) >= 0 && date.compareTo(endDate) <= 0; } catch (ParseException e) { LOG.warn("Cannot parse directory name: " + dirName); return false; } } }); if (matchingDirs.length == 0) { LOG.warn("No matching directories for the given input"); } // Re-indexing check for each matching for (FileStatus matchingDir : matchingDirs) { String matchingDirDateString = NASADatasetUtil.extractDateStringFromFileStatus(matchingDir); if (existYearlyIndexes.containsKey(NASADatasetUtil.getYearFormat(matchingDirDateString))) { // needs to re-build year, month and year indexes existYearlyIndexes.put(NASADatasetUtil.getYearFormat(matchingDirDateString), true); existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true); existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true); } else if (existMonthlyIndexes.containsKey(NASADatasetUtil.getMonthFormat(matchingDirDateString))) { // needs to re-build month and day indexes existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true); existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true); } else if (existDailyIndexes.containsKey(NASADatasetUtil.getDayFormat(matchingDirDateString))) { // needs to re-build day index existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true); } else { // needs to build a new index existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true); int daysCountInMonth = getMatchesCountFromMap(existDailyIndexes, NASADatasetUtil.getMonthFormat(matchingDirDateString)); if (daysCountInMonth >= getNumDaysPerMonth( NASADatasetUtil.extractMonthFromDate(matchingDirDateString))) { existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true); int monthsCountInYear = getMatchesCountFromMap(existMonthlyIndexes, NASADatasetUtil.getYearFormat(matchingDirDateString)); if (monthsCountInYear >= getNumMonthsPerYear()) { existYearlyIndexes.put(NASADatasetUtil.getYearFormat(matchingDirDateString), true); } } } } convertNeededIndexesListIntoArrays(); }
From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java
License:Open Source License
/** * Lists the contents of a directory/*ww w .j ava2 s. c o m*/ * @param request * @param response */ private void handleListFiles(HttpServletRequest request, HttpServletResponse response) { try { String pathStr = request.getParameter("path"); Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr); FileSystem fs = path.getFileSystem(commonParams); FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter); Arrays.sort(fileStatuses, new Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { if (o1.isDirectory() && o2.isFile()) return -1; if (o1.isFile() && o2.isDirectory()) return 1; return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase()); } }); response.setContentType("application/json;charset=utf-8"); response.setStatus(HttpServletResponse.SC_OK); PrintWriter out = response.getWriter(); out.print("{\"FileStatuses\":{"); if (pathStr.endsWith("/")) { pathStr = pathStr.substring(0, pathStr.length() - 1); } out.printf("\"BaseDir\":\"%s\",", pathStr); if (path.getParent() != null) out.printf("\"ParentDir\":\"%s\",", path.getParent()); out.print("\"FileStatus\":["); for (int i = 0; i < fileStatuses.length; i++) { FileStatus fileStatus = fileStatuses[i]; if (i != 0) out.print(','); String filename = fileStatus.getPath().getName(); int idot = filename.lastIndexOf('.'); String extension = idot == -1 ? "" : filename.substring(idot + 1); out.printf( "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d," + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d," + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\"," + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}", fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(), fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(), fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0, fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase()); } out.print("]}"); // Check if there is an image or master file FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("_master") || path.getName().equals("_data.png"); } }); for (FileStatus metaFile : metaFiles) { String metaFileName = metaFile.getPath().getName(); if (metaFileName.startsWith("_master")) { out.printf(",\"MasterPath\":\"%s\"", metaFileName); String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams); if (shape != null) out.printf(",\"Shape\":\"%s\"", shape); } else if (metaFileName.equals("_data.png")) out.printf(",\"ImagePath\":\"%s\"", metaFileName); } out.print("}"); out.close(); } catch (Exception e) { System.out.println("error happened"); e.printStackTrace(); try { e.printStackTrace(response.getWriter()); } catch (IOException ioe) { ioe.printStackTrace(); e.printStackTrace(); } response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); } }
From source file:edu.umn.cs.spatialHadoop.visualization.MultilevelPlot.java
License:Open Source License
private static void plotLocal(Path[] inFiles, final Path outPath, final Class<? extends Plotter> plotterClass, final OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { final boolean vflip = params.getBoolean("vflip", true); OperationsParams mbrParams = new OperationsParams(params); mbrParams.setBoolean("background", false); final Rectangle inputMBR = params.get("mbr") != null ? params.getShape("mbr").getMBR() : FileMBR.fileMBR(inFiles, mbrParams); OperationsParams.setShape(params, InputMBR, inputMBR); // Retrieve desired output image size and keep aspect ratio if needed int tileWidth = params.getInt("tilewidth", 256); int tileHeight = params.getInt("tileheight", 256); // Adjust width and height if aspect ratio is to be kept if (params.getBoolean("keepratio", true)) { // Expand input file to a rectangle for compatibility with the pyramid // structure if (inputMBR.getWidth() > inputMBR.getHeight()) { inputMBR.y1 -= (inputMBR.getWidth() - inputMBR.getHeight()) / 2; inputMBR.y2 = inputMBR.y1 + inputMBR.getWidth(); } else {// w ww. ja va 2 s . c o m inputMBR.x1 -= (inputMBR.getHeight() - inputMBR.getWidth()) / 2; inputMBR.x2 = inputMBR.x1 + inputMBR.getHeight(); } } String outFName = outPath.getName(); int extensionStart = outFName.lastIndexOf('.'); final String extension = extensionStart == -1 ? ".png" : outFName.substring(extensionStart); // Start reading input file Vector<InputSplit> splits = new Vector<InputSplit>(); final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); for (Path inFile : inFiles) { FileSystem inFs = inFile.getFileSystem(params); if (!OperationsParams.isWildcard(inFile) && inFs.exists(inFile) && !inFs.isDirectory(inFile)) { if (SpatialSite.NonHiddenFileFilter.accept(inFile)) { // Use the normal input format splitter to add this non-hidden file Job job = Job.getInstance(params); SpatialInputFormat3.addInputPath(job, inFile); splits.addAll(inputFormat.getSplits(job)); } else { // A hidden file, add it immediately as one split // This is useful if the input is a hidden file which is automatically // skipped by FileInputFormat. We need to plot a hidden file for the case // of plotting partition boundaries of a spatial index splits.add(new FileSplit(inFile, 0, inFs.getFileStatus(inFile).getLen(), new String[0])); } } else { Job job = Job.getInstance(params); SpatialInputFormat3.addInputPath(job, inFile); splits.addAll(inputFormat.getSplits(job)); } } try { Plotter plotter = plotterClass.newInstance(); plotter.configure(params); String[] strLevels = params.get("levels", "7").split("\\.\\."); int minLevel, maxLevel; if (strLevels.length == 1) { minLevel = 0; maxLevel = Integer.parseInt(strLevels[0]); } else { minLevel = Integer.parseInt(strLevels[0]); maxLevel = Integer.parseInt(strLevels[1]); } GridInfo bottomGrid = new GridInfo(inputMBR.x1, inputMBR.y1, inputMBR.x2, inputMBR.y2); bottomGrid.rows = bottomGrid.columns = 1 << maxLevel; TileIndex key = new TileIndex(); // All canvases in the pyramid, one per tile Map<TileIndex, Canvas> canvases = new HashMap<TileIndex, Canvas>(); for (InputSplit split : splits) { FileSplit fsplit = (FileSplit) split; RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Rectangle partition = reader.getCurrentKey(); if (!partition.isValid()) partition.set(inputMBR); Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { Rectangle shapeMBR = shape.getMBR(); if (shapeMBR == null) continue; java.awt.Rectangle overlappingCells = bottomGrid.getOverlappingCells(shapeMBR); // Iterate over levels from bottom up for (key.level = maxLevel; key.level >= minLevel; key.level--) { for (key.x = overlappingCells.x; key.x < overlappingCells.x + overlappingCells.width; key.x++) { for (key.y = overlappingCells.y; key.y < overlappingCells.y + overlappingCells.height; key.y++) { Canvas canvas = canvases.get(key); if (canvas == null) { Rectangle tileMBR = new Rectangle(); int gridSize = 1 << key.level; tileMBR.x1 = (inputMBR.x1 * (gridSize - key.x) + inputMBR.x2 * key.x) / gridSize; tileMBR.x2 = (inputMBR.x1 * (gridSize - (key.x + 1)) + inputMBR.x2 * (key.x + 1)) / gridSize; tileMBR.y1 = (inputMBR.y1 * (gridSize - key.y) + inputMBR.y2 * key.y) / gridSize; tileMBR.y2 = (inputMBR.y1 * (gridSize - (key.y + 1)) + inputMBR.y2 * (key.y + 1)) / gridSize; canvas = plotter.createCanvas(tileWidth, tileHeight, tileMBR); canvases.put(key.clone(), canvas); } plotter.plot(canvas, shape); } } // Update overlappingCells for the higher level int updatedX1 = overlappingCells.x / 2; int updatedY1 = overlappingCells.y / 2; int updatedX2 = (overlappingCells.x + overlappingCells.width - 1) / 2; int updatedY2 = (overlappingCells.y + overlappingCells.height - 1) / 2; overlappingCells.x = updatedX1; overlappingCells.y = updatedY1; overlappingCells.width = updatedX2 - updatedX1 + 1; overlappingCells.height = updatedY2 - updatedY1 + 1; } } } reader.close(); } // Done with all splits. Write output to disk LOG.info("Done with plotting. Now writing the output"); final FileSystem outFS = outPath.getFileSystem(params); LOG.info("Writing default empty image"); // Write a default empty image to be displayed for non-generated tiles BufferedImage emptyImg = new BufferedImage(tileWidth, tileHeight, BufferedImage.TYPE_INT_ARGB); Graphics2D g = new SimpleGraphics(emptyImg); g.setBackground(new Color(0, 0, 0, 0)); g.clearRect(0, 0, tileWidth, tileHeight); g.dispose(); // Write HTML file to browse the mutlielvel image OutputStream out = outFS.create(new Path(outPath, "default.png")); ImageIO.write(emptyImg, "png", out); out.close(); // Add an HTML file that visualizes the result using Google Maps LOG.info("Writing the HTML viewer file"); LineReader templateFileReader = new LineReader( MultilevelPlot.class.getResourceAsStream("/zoom_view.html")); PrintStream htmlOut = new PrintStream(outFS.create(new Path(outPath, "index.html"))); Text line = new Text(); while (templateFileReader.readLine(line) > 0) { String lineStr = line.toString(); lineStr = lineStr.replace("#{TILE_WIDTH}", Integer.toString(tileWidth)); lineStr = lineStr.replace("#{TILE_HEIGHT}", Integer.toString(tileHeight)); lineStr = lineStr.replace("#{MAX_ZOOM}", Integer.toString(maxLevel)); lineStr = lineStr.replace("#{MIN_ZOOM}", Integer.toString(minLevel)); lineStr = lineStr.replace("#{TILE_URL}", "'tile-' + zoom + '-' + coord.x + '-' + coord.y + '" + extension + "'"); htmlOut.println(lineStr); } templateFileReader.close(); htmlOut.close(); // Write the tiles final Entry<TileIndex, Canvas>[] entries = canvases.entrySet().toArray(new Map.Entry[canvases.size()]); // Clear the hash map to save memory as it is no longer needed canvases.clear(); int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors()); Parallel.forEach(entries.length, new RunnableRange<Object>() { @Override public Object run(int i1, int i2) { boolean output = params.getBoolean("output", true); try { Plotter plotter = plotterClass.newInstance(); plotter.configure(params); for (int i = i1; i < i2; i++) { Map.Entry<TileIndex, Canvas> entry = entries[i]; TileIndex key = entry.getKey(); if (vflip) key.y = ((1 << key.level) - 1) - key.y; Path imagePath = new Path(outPath, key.getImageFileName() + extension); // Write this tile to an image DataOutputStream outFile = output ? outFS.create(imagePath) : new DataOutputStream(new NullOutputStream()); plotter.writeImage(entry.getValue(), outFile, vflip); outFile.close(); // Remove entry to allows GC to collect it entries[i] = null; } return null; } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } }, parallelism); } catch (InstantiationException e) { throw new RuntimeException("Error creating rastierizer", e); } catch (IllegalAccessException e) { throw new RuntimeException("Error creating rastierizer", e); } }
From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java
License:Open Source License
/** * A MapReduce version of KNN query./*ww w. j a v a 2 s . c o m*/ * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "PKNN"); job.setJarByClass(HSPKNNQ.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }
From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java
License:Open Source License
public static void main(String[] args) throws IOException { //./hadoop jar /export/scratch/louai/idea-stHadoop/st-hadoop-uber.jar pknn /mntgIndex/yyyy-MM-dd/2017-08-03 /pknn k:2 point:-78.9659,35.7998 shape:edu.umn.cs.sthadoop.mntg.STPointMntg -overwrite // args = new String[8]; // args[0] = "/export/scratch/mntgData/mntgIndex"; // args[1] = "/export/scratch/mntgData/pknn"; // args[2] = "-overwrite"; // args[3] = "k:10"; // args[4] = "point:-78.9659063204100,35.7903907684998"; // args[5] = "shape:edu.umn.cs.sthadoop.trajectory.STPointTrajectory"; // args[6] = "interval:2017-08-03,2017-08-04"; // args[7] = "-overwrite"; final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();/*from www.j a v a 2 s .c o m*/ System.exit(1); } if (paths.length > 1 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("interval") == null) { System.err.println("Temporal range missing"); printUsage(); System.exit(1); } TextSerializable inObj = params.getShape("shape"); if (!(inObj instanceof STPoint)) { if (!(inObj instanceof STRectangle)) { LOG.error("Shape is not instance of STPoint or instance of STRectangle"); printUsage(); System.exit(1); } } // path to the spatio-temporal index. List<Path> STPaths = new ArrayList<Path>(); try { STPaths = STRangeQuery.getIndexedSlices(params); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } for (Path input : STPaths) { final Path inputFile = input; int count = params.getInt("count", 1); double closeness = params.getFloat("closeness", -1.0f); final Point[] queryPoints = closeness < 0 ? params.getShapes("point", new Point()) : new Point[count]; final FileSystem fs = inputFile.getFileSystem(params); final int k = params.getInt("k", 1); int concurrency = params.getInt("concurrency", 100); if (k == 0) { LOG.warn("k = 0"); } if (queryPoints.length == 0) { printUsage(); throw new RuntimeException("Illegal arguments"); } final Path outputPath = paths.length > 1 ? new Path(paths[1].toUri() + "-" + input.getName()) : null; if (closeness >= 0) { // Get query points according to its closeness to grid intersections GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, inputFile); long seed = params.getLong("seed", System.currentTimeMillis()); Random random = new Random(seed); for (int i = 0; i < count; i++) { int i_block = random.nextInt(gindex.size()); int direction = random.nextInt(4); // Generate a point in the given direction // Get center point (x, y) Iterator<Partition> iterator = gindex.iterator(); while (i_block-- >= 0) iterator.next(); Partition partition = iterator.next(); double cx = (partition.x1 + partition.x2) / 2; double cy = (partition.y1 + partition.y2) / 2; double cw = partition.x2 - partition.x1; double ch = partition.y2 - partition.y1; int signx = ((direction & 1) == 0) ? 1 : -1; int signy = ((direction & 2) == 1) ? 1 : -1; double x = cx + cw * closeness / 2 * signx; double y = cy + ch * closeness / 2 * signy; queryPoints[i] = new Point(x, y); } } final BooleanWritable exceptionHappened = new BooleanWritable(); Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler() { public void uncaughtException(Thread th, Throwable ex) { ex.printStackTrace(); exceptionHappened.set(true); } }; // Run each query in a separate thread final Vector<Thread> threads = new Vector<Thread>(); for (int i = 0; i < queryPoints.length; i++) { Thread thread = new Thread() { @Override public void run() { try { Point query_point = queryPoints[threads.indexOf(this)]; OperationsParams newParams = new OperationsParams(params); OperationsParams.setShape(newParams, "point", query_point); Job job = knn(inputFile, outputPath, params); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } }; thread.setUncaughtExceptionHandler(h); threads.add(thread); } long t1 = System.currentTimeMillis(); do { // Ensure that there is at least MaxConcurrentThreads running int i = 0; while (i < concurrency && i < threads.size()) { Thread.State state = threads.elementAt(i).getState(); if (state == Thread.State.TERMINATED) { // Thread already terminated, remove from the queue threads.remove(i); } else if (state == Thread.State.NEW) { // Start the thread and move to next one threads.elementAt(i++).start(); } else { // Thread is still running, skip over it i++; } } if (!threads.isEmpty()) { try { // Sleep for 10 seconds or until the first thread terminates threads.firstElement().join(10000); } catch (InterruptedException e) { e.printStackTrace(); } } } while (!threads.isEmpty()); long t2 = System.currentTimeMillis(); if (exceptionHappened.get()) throw new RuntimeException("Not all jobs finished correctly"); System.out.println("Time for " + queryPoints.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Total iterations: " + TotalIterations); } }
From source file:edu.umn.cs.sthadoop.operations.STJoin.java
License:Open Source License
/** * @param args//from w w w . ja v a 2 s.c o m * @throws Exception */ public static void main(String[] args) throws Exception { // args = new String[10]; // args[0] = "/home/louai/nyc-taxi/yellowIndex"; // args[1] = "/home/louai/nyc-taxi/humanIndex"; // args[2] = "/home/louai/nyc-taxi/resultSTJoin"; // args[3] = "shape:edu.umn.cs.sthadoop.core.STPoint"; // args[4] = // "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391"; // args[5] = "interval:2015-01-01,2015-01-02"; // args[6] = "timeDistance:1,day"; // args[7] = "spaceDistance:2"; // args[8] = "-overwrite"; // args[9] = "-no-local"; OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); Path[] allFiles = params.getPaths(); if (allFiles.length < 2) { System.err.println("This operation requires at least two input files"); printUsage(); System.exit(1); } if (allFiles.length == 2 && !params.checkInput()) { // One of the input files does not exist printUsage(); System.exit(1); } if (allFiles.length > 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("timedistance") == null) { System.err.println("time distance is missing"); printUsage(); System.exit(1); } if (params.get("spacedistance") == null) { System.err.println("space distance is missing"); printUsage(); System.exit(1); } Path[] inputPaths = allFiles.length == 2 ? allFiles : params.getInputPaths(); Path outputPath = allFiles.length == 2 ? null : params.getOutputPath(); // modify the query range with new time interval to consider in join String[] value = params.get("timedistance").split(","); String[] date = params.get("interval").split(","); int interval = Integer.parseInt(value[0]); String start = addtimeSpaceToInterval(date[0], -interval); String end = addtimeSpaceToInterval(date[1], interval); params.set("interval", start + "," + end); // Query from the dataset. for (Path input : inputPaths) { args = new String[7]; args[0] = input.toString(); args[1] = outputPath.getParent().toString() + "candidatebuckets/" + input.getName(); args[2] = "shape:" + params.get("shape"); args[3] = "rect:" + params.get("rect"); args[4] = "interval:" + params.get("interval"); args[5] = "-overwrite"; args[6] = "-no-local"; for (String x : args) System.out.println(x); STRangeQuery.main(args); System.out.println("done with the STQuery from: " + input.toString() + "\n" + "candidate:" + args[1]); } // invoke the map-hash and reduce-join . FileSystem fs = outputPath.getFileSystem(params); Path inputstjoin; if (fs.exists(new Path(outputPath.getParent().toString() + "candidatebuckets/"))) { inputstjoin = new Path(outputPath.getParent().toString() + "candidatebuckets"); } else { inputstjoin = new Path(outputPath.getParent().toString() + "/candidatebuckets"); } Path hashedbucket = new Path(outputPath.getParent().toString() + "hashedbucket"); long t1 = System.currentTimeMillis(); // join hash step args = new String[7]; args[0] = inputstjoin.toString(); args[1] = hashedbucket.toString(); args[2] = "shape:" + params.get("shape"); args[3] = "rect:" + params.get("rect"); args[4] = "interval:" + params.get("interval"); args[5] = "-overwrite"; args[6] = "-no-local"; for (String x : args) System.out.println(x); STHash.main(args); // //join Step // if(fs.exists(new Path(outputPath.getParent().toString()+"hashedbucket"))){ // inputstjoin = new Path(outputPath.getParent().toString()+"hashedbucket"); // }else{ // inputstjoin = new Path(outputPath.getParent().toString()+"/hashedbucket"); // } //Join refinement Step stJoin(hashedbucket, outputPath, params); long t2 = System.currentTimeMillis(); System.out.println("Total join time: " + (t2 - t1) + " millis"); }