Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter


In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.



Source Link


From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java

License:Open Source License

 * Recover fill values in the array {@link Values}.
 * @param conf// ww w  .j a  va 2s  .c  o  m
 * @throws IOException 
 * @throws Exception 
private void recoverFillValues(Configuration conf) throws IOException {
    // For now, we can only recover values of type short
    HDFFile waterMaskFile = null;
    try {
        // Read water mask
        Path wmPath = new Path(
                conf.get(WATER_MASK_PATH, "http://e4ftl01.cr.usgs.gov/MOLT/MOD44W.005/2000.02.24/"));
        final String tileIdentifier = String.format("h%02dv%02d", nasaDataset.h, nasaDataset.v);
        FileSystem wmFs = wmPath.getFileSystem(conf);
        FileStatus[] wmFile = wmFs.listStatus(wmPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.getName().contains(tileIdentifier);
        if (wmFile.length == 0) {
            LOG.warn("Could not find water mask for tile '" + tileIdentifier + "'");
        Path wmFileToLoad = wmFile[0].getPath();
        if (wmFs instanceof HTTPFileSystem) {
            wmFileToLoad = new Path(FileUtil.copyFile(conf, wmFileToLoad));
            wmFs = FileSystem.getLocal(conf);
        waterMaskFile = new HDFFile(wmFs.open(wmFileToLoad));
        DDVGroup waterMaskGroup = waterMaskFile.findGroupByName("water_mask");
        if (waterMaskGroup == null) {
            LOG.warn("Water mask dataset 'water_mask' not found in file " + wmFile[0]);
        byte[] waterMask = null;
        for (DataDescriptor dd : waterMaskGroup.getContents()) {
            if (dd instanceof DDNumericDataGroup) {
                DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
                waterMask = (byte[]) numericDataGroup.getAsByteArray();
        // Convert the waterMask to a BinArray of the right size
        int size = 4800 / nasaDataset.resolution;
        BitArray waterMaskBits = convertWaterMaskToBits(ByteBuffer.wrap(waterMask), size);

        short fillValueShort = (short) HDFConstants.readAsInteger(fillValueBytes, 0, fillValueBytes.length);
        recoverXYShorts(ByteBuffer.wrap(unparsedDataArray), fillValueShort, waterMaskBits);
    } finally {
        if (waterMaskFile != null)

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader3.java

License:Open Source License

 * Recover fill values in the array {@link Values}.
 * @param conf/*from  w w w.j a v  a2 s  .co m*/
 * @throws IOException 
 * @throws Exception 
private void recoverFillValues(Configuration conf) throws IOException {
    HDFFile waterMaskFile = null;
    try {
        // Read water mask
        Path wmPath = new Path(
                conf.get(WATER_MASK_PATH, "http://e4ftl01.cr.usgs.gov/MOLT/MOD44W.005/2000.02.24/"));
        final String tileIdentifier = String.format("h%02dv%02d", nasaDataset.h, nasaDataset.v);
        FileSystem wmFs = wmPath.getFileSystem(conf);
        FileStatus[] wmFile = wmFs.listStatus(wmPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.getName().contains(tileIdentifier);
        if (wmFile.length == 0) {
            LOG.warn("Could not find water mask for tile '" + tileIdentifier + "'");
        Path wmFileToLoad = wmFile[0].getPath();
        if (wmFs instanceof HTTPFileSystem) {
            wmFileToLoad = new Path(FileUtil.copyFile(conf, wmFileToLoad));
            wmFs = FileSystem.getLocal(conf);
        waterMaskFile = new HDFFile(wmFs.open(wmFileToLoad));
        DDVGroup waterMaskGroup = waterMaskFile.findGroupByName("water_mask");
        if (waterMaskGroup == null) {
            LOG.warn("Water mask dataset 'water_mask' not found in file " + wmFile[0]);
        byte[] waterMask = null;
        for (DataDescriptor dd : waterMaskGroup.getContents()) {
            if (dd instanceof DDNumericDataGroup) {
                DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
                waterMask = (byte[]) numericDataGroup.getAsAnArray();

        // Stores which values has been recovered by copying a single value
        // without interpolation in the x-direction
        byte[] valueStatus = new byte[dataArray.length];

        recoverXDirection(waterMask, valueStatus);
        recoverYDirection(waterMask, valueStatus);
    } finally {
        if (waterMaskFile != null)

From source file:edu.umn.cs.spatialHadoop.nasa.ImageCompare.java

License:Open Source License

 * Compares two directories for similar images with matching names.
 * @param dir1/*from   w  w w.  j av  a 2 s  .com*/
 * @param dir2
 * @throws IOException 
public static void compareFolders(Path dir1, Path dir2) throws IOException {
    final PathFilter png_filter = new PathFilter() {
        public boolean accept(Path path) {
            return path.getName().toLowerCase().endsWith(".png");
    // Retrieve all images in dir1
    FileStatus[] images1 = dir1.getFileSystem(new Configuration()).listStatus(dir1, png_filter);
    Map<String, Path> images1ByName = new HashMap<String, Path>();
    for (FileStatus fstatus : images1)
        images1ByName.put(fstatus.getPath().getName(), fstatus.getPath());

    // Retrieve all images in dir2
    FileStatus[] images2 = dir2.getFileSystem(new Configuration()).listStatus(dir2, png_filter);
    Map<String, Path> images2ByName = new HashMap<String, Path>();
    for (FileStatus fstatus : images2)
        images2ByName.put(fstatus.getPath().getName(), fstatus.getPath());

    final Vector<Double> similarities = new Vector<Double>();
    final Vector<String> names = new Vector<String>();

    // Compare every pair of images with similar names
    for (String imageName : images2ByName.keySet()) {
        Path image1 = images1ByName.get(imageName);
        if (image1 == null)
        Path image2 = images2ByName.get(imageName);
        double similarity = compareImages(image1, image2);

        if (similarity > 0.1) {
            System.out.println(image1 + "," + image2 + "," + similarity);
    // Sort images by similarity
    IndexedSortable sortable = new IndexedSortable() {
      public int compare(int i, int j) {
        double diff = similarities.get(i) - similarities.get(j);
        if (diff < 0)
          return -1;
        if (diff > 0)
          return 1;
        return 0;
      public void swap(int i, int j) {
        double tempSim = similarities.get(i);
        similarities.set(i, similarities.get(j));
        similarities.set(j, tempSim);
        String tempName = names.get(i);
        names.set(i, names.get(j));
        names.set(j, tempName);
    final IndexedSorter sorter = new QuickSort();
    sorter.sort(sortable, 0, names.size());
    final float threshold = 0.0f;
    // Display to 10 percentile matches
    for (int i = (int) (names.size() * threshold); i < names.size(); i++) {
      System.out.println(similarities.get(i)+ " ... "+names.get(i));

From source file:edu.umn.cs.spatialHadoop.nasa.MakeHDFVideo.java

License:Open Source License

 * @param args/*w  w  w .  j ava 2 s . co m*/
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    OperationsParams params = new OperationsParams(new GenericOptionsParser(args));
    if (!params.checkInputOutput()) {

    //Path input = params.getPaths()[0];
    Path output = params.getPaths()[1];
    boolean recoverHoles = params.is("recoverholes");
    boolean addDate = params.is("adddate");

    Vector<String> vargs = new Vector<String>(Arrays.asList(args));
    Rectangle plotRange = (Rectangle) params.getShape("rect");
    if (plotRange != null && recoverHoles) {
        // Extend the plot range to improve the quality of RecoverHoles
        for (int i = 0; i < vargs.size();) {
            if (vargs.get(i).startsWith("rect:") || vargs.get(i).startsWith("mbr:")
                    || vargs.get(i).startsWith("width:") || vargs.get(i).startsWith("height:")) {
            } else {
        double w = plotRange.getWidth();
        double h = plotRange.getHeight();
        plotRange = plotRange.buffer(w / 2, h / 2);

        int new_width = params.getInt("width", 1000) * 2;
        int new_height = params.getInt("height", 1000) * 2;

        vargs.add(plotRange.toText(new Text("rect:")).toString());
        vargs.add("width:" + new_width);
        vargs.add("height:" + new_height);

    // 1- Call HDF plot to generate all images
    HDFPlot.main(vargs.toArray(new String[vargs.size()]));

    // 2- Call RecoverHoles to recover holes (surprise)
    if (recoverHoles) {
        if (plotRange != null) {
            // Need to crop all images to restore original selection
            cropImages(output, (Rectangle) params.getShape("rect"), plotRange);

    if (addDate) {

    FileSystem outFs = output.getFileSystem(params);
    FileStatus[] generatedImages = outFs.listStatus(output, new PathFilter() {
        public boolean accept(Path path) {
            return path.getName().toLowerCase().endsWith(".png");
    if (generatedImages.length == 0) {
        Log.warn("No generated images");

    InputStream inStream = outFs.open(generatedImages[0].getPath());
    BufferedImage firstImage = ImageIO.read(inStream);

    int imageWidth = firstImage.getWidth();
    int imageHeight = firstImage.getHeight();

    String scaleRangeStr = params.get("scale-range");
    if (scaleRangeStr != null) {
        String[] parts = scaleRangeStr.split("\\.\\.");
        MinMax scaleRange = new MinMax();
        scaleRange.minValue = Integer.parseInt(parts[0]);
        scaleRange.maxValue = Integer.parseInt(parts[1]);
        HDFPlot2.drawScale(new Path(output, "scale.png"), scaleRange, 64, imageHeight);

    InputStream logoInputStream = MakeHDFVideo.class.getResourceAsStream("/gistic_logo.png");
    OutputStream logoOutputStream = outFs.create(new Path(output, "gistic_logo.png"));
    byte[] buffer = new byte[4096];
    int size = 0;
    while ((size = logoInputStream.read(buffer)) > 0) {
        logoOutputStream.write(buffer, 0, size);

    // Rename files to be ready to use with ffmpeg
    FileStatus[] all_images = outFs.listStatus(output, new PathFilter() {
        public boolean accept(Path path) {
            return path.getName().matches("\\d+\\.\\d+\\.\\d+\\.png");

    Arrays.sort(all_images, new Comparator<FileStatus>() {
        public int compare(FileStatus f1, FileStatus f2) {
            return f1.getPath().getName().compareTo(f2.getPath().getName());

    int day = 1;
    for (FileStatus image : all_images) {
        String newFileName = String.format("day_%03d.png", day++);
        outFs.rename(image.getPath(), new Path(output, newFileName));

    // Plot the overlay image
    Path overlay = params.get("overlay") == null ? null : new Path(params.get("overlay"));
    if (overlay != null) {
        // Draw an overlay image
        OperationsParams plotParams = new OperationsParams(params);

        // Keep all arguments except input and output which change for each call
        // to Plot or PlotPyramid
        Path overlayOutput = new Path(output, "overlay.png");
        plotParams.setClass("shape", OSMPolygon.class, Shape.class);
        GeometricPlot.plot(new Path[] { overlay }, overlayOutput, plotParams);

    String video_command;
    if (overlay != null) {
        video_command = "avconv -r 4 -i day_%3d.png " + "-vf \"movie=gistic_logo.png [watermark]; "
                + "movie=overlay.png [ways]; " + "movie=scale.png [scale]; " + "[in] crop="
                + plotRange.getWidth() + ":" + plotRange.getHeight() + "[in]; " + "[ways] crop="
                + plotRange.getWidth() + ":" + plotRange.getHeight() + "[ways]; "
                + "[in][watermark] overlay=main_w-overlay_w-10:10 [mid]; " + "[mid][ways] overlay=0:0 [mid2]; "
                + "[mid2] pad=iw+64:ih [mid3]; " + "[mid3][scale] overlay=main_w-overlay_w:0 [out]\" "
                + "-r 4 -pix_fmt yuv420p output.mp4 ";
    } else {
        video_command = "avconv -r 4 -i day_%3d.png -vf " + "\"movie=gistic_logo.png [watermark]; "
                + "movie=scale.png [scale]; " + "[in][watermark] overlay=main_w-overlay_w-10:10 [mid]; "
                + "[mid] pad=iw+64:ih [mid2]; " + "[mid2][scale] overlay=main_w-overlay_w:0 [out]\" "
                + "-r 4 -pix_fmt yuv420p output.mp4 ";
    PrintStream video_script = new PrintStream(outFs.create(new Path(output, "make_video.sh")));

From source file:edu.umn.cs.spatialHadoop.nasa.MultiHDFPlot.java

License:Open Source License

public static boolean multiplot(Path[] input, Path output, OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException, ParseException {
    String timeRange = params.get("time");
    final Date dateFrom, dateTo;
    final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd");
    try {// ww  w .  jav a 2  s  .c  om
        String[] parts = timeRange.split("\\.\\.");
        dateFrom = dateFormat.parse(parts[0]);
        dateTo = dateFormat.parse(parts[1]);
    } catch (ArrayIndexOutOfBoundsException e) {
        System.err.println("Use the seperator two periods '..' to seperate from and to dates");
        return false; // To avoid an error that causes dateFrom to be uninitialized
    } catch (ParseException e) {
        System.err.println("Illegal date format in " + timeRange);
        return false;
    // Number of frames to combine in each image
    int combine = params.getInt("combine", 1);
    // Retrieve all matching input directories based on date range
    Vector<Path> matchingPathsV = new Vector<Path>();
    for (Path inputFile : input) {
        FileSystem inFs = inputFile.getFileSystem(params);
        FileStatus[] matchingDirs = inFs.listStatus(input, new PathFilter() {
            public boolean accept(Path p) {
                String dirName = p.getName();
                try {
                    Date date = dateFormat.parse(dirName);
                    return date.compareTo(dateFrom) >= 0 && date.compareTo(dateTo) <= 0;
                } catch (ParseException e) {
                    LOG.warn("Cannot parse directory name: " + dirName);
                    return false;
        for (FileStatus matchingDir : matchingDirs)
            matchingPathsV.add(new Path(matchingDir.getPath(), "*.hdf"));
    if (matchingPathsV.isEmpty()) {
        LOG.warn("No matching directories to given input");
        return false;

    Path[] matchingPaths = matchingPathsV.toArray(new Path[matchingPathsV.size()]);

    // Clear all paths to ensure we set our own paths for each job

    // Create a water mask if we need to recover holes on write
    if (params.get("recover", "none").equals("write")) {
        // Recover images on write requires a water mask image to be generated first
        OperationsParams wmParams = new OperationsParams(params);
        wmParams.setBoolean("background", false);
        Path wmImage = new Path(output, new Path("water_mask"));
        HDFPlot.generateWaterMask(wmImage, wmParams);
        params.set(HDFPlot.PREPROCESSED_WATERMARK, wmImage.toString());
    // Start a job for each path
    int imageWidth = -1;
    int imageHeight = -1;
    boolean overwrite = params.getBoolean("overwrite", false);
    boolean pyramid = params.getBoolean("pyramid", false);
    FileSystem outFs = output.getFileSystem(params);
    Vector<Job> jobs = new Vector<Job>();
    boolean background = params.getBoolean("background", false);
    Rectangle mbr = new Rectangle(-180, -90, 180, 90);
    for (int i = 0; i < matchingPaths.length; i += combine) {
        Path[] inputPaths = new Path[Math.min(combine, matchingPaths.length - i)];
        System.arraycopy(matchingPaths, i, inputPaths, 0, inputPaths.length);
        Path outputPath = new Path(output, inputPaths[0].getParent().getName() + (pyramid ? "" : ".png"));
        if (overwrite || !outFs.exists(outputPath)) {
            // Need to plot
            Job rj = HDFPlot.plotHeatMap(inputPaths, outputPath, params);
            if (imageHeight == -1 || imageWidth == -1) {
                if (rj != null) {
                    imageHeight = rj.getConfiguration().getInt("height", 1000);
                    imageWidth = rj.getConfiguration().getInt("width", 1000);
                    mbr = (Rectangle) OperationsParams.getShape(rj.getConfiguration(), "mbr");
                } else {
                    imageHeight = params.getInt("height", 1000);
                    imageWidth = params.getInt("width", 1000);
                    mbr = (Rectangle) OperationsParams.getShape(params, "mbr");
            if (background && rj != null)
    // Wait until all jobs are done
    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob.getJobID());
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
            throw new RuntimeException("Error running job " + firstJob.getJobID());

    // Draw the scale in the output path if needed
    String scalerange = params.get("scalerange");
    if (scalerange != null) {
        String[] parts = scalerange.split("\\.\\.");
        double min = Double.parseDouble(parts[0]);
        double max = Double.parseDouble(parts[1]);
        String scale = params.get("scale", "none").toLowerCase();
        if (scale.equals("vertical")) {
            MultiHDFPlot.drawVerticalScale(new Path(output, "scale.png"), min, max, 64, imageHeight, params);
        } else if (scale.equals("horizontal")) {
            MultiHDFPlot.drawHorizontalScale(new Path(output, "scale.png"), min, max, imageWidth, 64, params);
    // Add the KML file
    createKML(outFs, output, mbr, params);
    return true;

From source file:edu.umn.cs.spatialHadoop.nasa.MultiHDFPlot.java

License:Open Source License

private static void createKML(FileSystem outFs, Path output, Rectangle mbr, OperationsParams params)
        throws IOException, ParseException {
    FileStatus[] all_images = outFs.listStatus(output, new PathFilter() {
        @Override/*from   w w w .j  ava2 s .c om*/
        public boolean accept(Path path) {
            return path.getName().matches("\\d+\\.\\d+\\.\\d+\\.png");

    Path kmlPath = new Path(output, "index.kml");
    PrintStream ps = new PrintStream(outFs.create(kmlPath));
    ps.println("<?xml version='1.0' encoding='UTF-8'?>");
    ps.println("<kml xmlns='http://www.opengis.net/kml/2.2'>");
    String mbrStr = String.format(
            "<LatLonBox><west>%f</west><south>%f</south><east>%f</east><north>%f</north></LatLonBox>", mbr.x1,
            mbr.y1, mbr.x2, mbr.y2);
    for (FileStatus image : all_images) {
        SimpleDateFormat fileDateFormat = new SimpleDateFormat("yyyy.MM.dd");
        SimpleDateFormat kmlDateFormat = new SimpleDateFormat("yyyy-MM-dd");
        String name = image.getPath().getName();
        int dotIndex = name.lastIndexOf('.');
        name = name.substring(0, dotIndex);
        Date date = fileDateFormat.parse(name);
        String kmlDate = kmlDateFormat.format(date);
        ps.println("<name>" + kmlDate + "</name>");
        ps.println("<begin>" + kmlDate + "</begin>");
        ps.println("<end>" + kmlDateFormat.format(date.getTime() + OneDayMillis) + "</end>");
        ps.println("<Icon><href>" + image.getPath().getName() + "</href></Icon>");
    String scale = params.get("scale", "none").toLowerCase();
    if (scale.equals("vertical")) {
        ps.println("<overlayXY x='1' y='0.5' xunits='fraction' yunits='fraction'/>");
        ps.println("<screenXY x='1' y='0.5' xunits='fraction' yunits='fraction'/>");
        ps.println("<rotationXY x='0' y='0' xunits='fraction' yunits='fraction'/>");
        ps.println("<size x='0' y='0.7' xunits='fraction' yunits='fraction'/>");
    } else if (scale.equals("horizontal")) {
        ps.println("<overlayXY x='0.5' y='0' xunits='fraction' yunits='fraction'/>");
        ps.println("<screenXY x='0.5' y='0' xunits='fraction' yunits='fraction'/>");
        ps.println("<rotationXY x='0' y='0' xunits='fraction' yunits='fraction'/>");
        ps.println("<size x='0.7' y='0' xunits='fraction' yunits='fraction'/>");

From source file:edu.umn.cs.spatialHadoop.nasa.MultiHDFPlot.java

License:Open Source License

private static void createVideo(FileSystem outFs, Path output, boolean addLogo) throws IOException {
    // Rename all generated files to be day_%3d.png
    // Rename files to be ready to use with ffmpeg
    FileStatus[] all_images = outFs.listStatus(output, new PathFilter() {
        @Override/* w w w .  j  a va  2 s.  c  om*/
        public boolean accept(Path path) {
            return path.getName().matches("\\d+\\.\\d+\\.\\d+\\.png");

    Arrays.sort(all_images, new Comparator<FileStatus>() {
        public int compare(FileStatus f1, FileStatus f2) {
            return f1.getPath().getName().compareTo(f2.getPath().getName());

    int day = 1;
    for (FileStatus image : all_images) {
        String newFileName = String.format("day_%03d.png", day++);
        outFs.rename(image.getPath(), new Path(output, newFileName));

    String videoCommand;
    if (addLogo) {
        // Puts frames together into a video
        videoCommand = "avconv -r 4 -i day_%3d.png -vf " + "\"movie=gistic_logo.png [watermark]; "
                + "movie=scale.png [scale]; " + "[in][watermark] overlay=main_w-overlay_w-10:10 [mid]; "
                + "[mid] pad=iw+64:ih [mid2]; " + "[mid2][scale] overlay=main_w-overlay_w:0 [out]\" "
                + "-r 4 -pix_fmt yuv420p output.mp4 ";
    } else {
        videoCommand = "avconv -r 4 -i day_%3d.png -vf \"" + "movie=scale.png [scale]; "
                + "[in] pad=iw+64:ih [mid2]; " + "[mid2][scale] overlay=main_w-overlay_w:0 [out]\" "
                + "-r 4 -pix_fmt yuv420p output.mp4 ";
    System.out.println("Run the following command to generate the video");

From source file:edu.umn.cs.spatialHadoop.nasa.SpatioAggregateQueries.java

License:Open Source License

 * Performs a spatio-temporal aggregate query on an indexed directory
 * @param inFile//  w w  w  .  j a v a2s.  co  m
 * @param params
 * @throws ParseException 
 * @throws IOException 
 * @throws InterruptedException 
public static AggregateQuadTree.Node aggregateQuery(Path inFile, OperationsParams params)
        throws ParseException, IOException, InterruptedException {
    // 1- Find matching temporal partitions
    final FileSystem fs = inFile.getFileSystem(params);
    Vector<Path> matchingPartitions = selectTemporalPartitions(inFile, params);

    // 2- Find all matching files (AggregateQuadTrees) in matching partitions
    final Rectangle spatialRange = params.getShape("rect", new Rectangle()).getMBR();
    // Convert spatialRange from lat/lng space to Sinusoidal space
    double cosPhiRad = Math.cos(spatialRange.y1 * Math.PI / 180);
    double southWest = spatialRange.x1 * cosPhiRad;
    double southEast = spatialRange.x2 * cosPhiRad;
    cosPhiRad = Math.cos(spatialRange.y2 * Math.PI / 180);
    double northWest = spatialRange.x1 * cosPhiRad;
    double northEast = spatialRange.x2 * cosPhiRad;
    spatialRange.x1 = Math.min(northWest, southWest);
    spatialRange.x2 = Math.max(northEast, southEast);
    // Convert to the h v space used by MODIS
    spatialRange.x1 = (spatialRange.x1 + 180.0) / 10.0;
    spatialRange.x2 = (spatialRange.x2 + 180.0) / 10.0;
    spatialRange.y2 = (90.0 - spatialRange.y2) / 10.0;
    spatialRange.y1 = (90.0 - spatialRange.y1) / 10.0;
    // Vertically flip because the Sinusoidal space increases to the south
    double tmp = spatialRange.y2;
    spatialRange.y2 = spatialRange.y1;
    spatialRange.y1 = tmp;
    // Find the range of cells in MODIS Sinusoidal grid overlapping the range
    final int h1 = (int) Math.floor(spatialRange.x1);
    final int h2 = (int) Math.ceil(spatialRange.x2);
    final int v1 = (int) Math.floor(spatialRange.y1);
    final int v2 = (int) Math.ceil(spatialRange.y2);
    PathFilter rangeFilter = new PathFilter() {
        public boolean accept(Path p) {
            Matcher matcher = MODISTileID.matcher(p.getName());
            if (!matcher.matches())
                return false;
            int h = Integer.parseInt(matcher.group(1));
            int v = Integer.parseInt(matcher.group(2));
            return h >= h1 && h < h2 && v >= v1 && v < v2;

    final Vector<Path> allMatchingFiles = new Vector<Path>();

    for (Path matchingPartition : matchingPartitions) {
        // Select all matching files
        FileStatus[] matchingFiles = fs.listStatus(matchingPartition, rangeFilter);
        for (FileStatus matchingFile : matchingFiles) {

    //noinspection SizeReplaceableByIsEmpty
    if (allMatchingFiles.isEmpty())
        return null;

    final int resolution = AggregateQuadTree.getResolution(fs, allMatchingFiles.get(0));

    // 3- Query all matching files in parallel
    List<Node> threadsResults = Parallel.forEach(allMatchingFiles.size(),
            new RunnableRange<AggregateQuadTree.Node>() {
                public Node run(int i1, int i2) {
                    Node threadResult = new AggregateQuadTree.Node();
                    for (int i_file = i1; i_file < i2; i_file++) {
                        Path matchingFile = allMatchingFiles.get(i_file);
                        try {
                            Matcher matcher = MODISTileID.matcher(matchingFile.getName());
                            matcher.matches(); // It has to match
                            int h = Integer.parseInt(matcher.group(1));
                            int v = Integer.parseInt(matcher.group(2));
                            // Clip the query region and normalize in this tile
                            Rectangle translated = spatialRange.translate(-h, -v);
                            int x1 = (int) (Math.max(translated.x1, 0) * resolution);
                            int y1 = (int) (Math.max(translated.y1, 0) * resolution);
                            int x2 = (int) (Math.min(translated.x2, 1.0) * resolution);
                            int y2 = (int) (Math.min(translated.y2, 1.0) * resolution);
                            AggregateQuadTree.Node fileResult = AggregateQuadTree.aggregateQuery(fs,
                                    matchingFile, new java.awt.Rectangle(x1, y1, (x2 - x1), (y2 - y1)));
                        } catch (Exception e) {
                            throw new RuntimeException("Error reading file " + matchingFile, e);
                    return threadResult;
    AggregateQuadTree.Node finalResult = new AggregateQuadTree.Node();
    for (Node threadResult : threadsResults) {
    numOfTreesTouchesInLastRequest = allMatchingFiles.size();
    return finalResult;

From source file:edu.umn.cs.spatialHadoop.nasa.SpatioAggregateQueries.java

License:Open Source License

 * Performs a spatio-temporal aggregate query on an indexed directory
 * @param inFile//from  w ww.j av  a2 s .co m
 * @param params
 * @throws ParseException 
 * @throws IOException 
 * @throws InterruptedException 
public static long selectionQuery(Path inFile, final ResultCollector<NASAPoint> output, OperationsParams params)
        throws ParseException, IOException, InterruptedException {
    // 1- Find matching temporal partitions
    final FileSystem fs = inFile.getFileSystem(params);
    Vector<Path> matchingPartitions = selectTemporalPartitions(inFile, params);

    // 2- Find the matching tile and the position in that tile
    final Point queryPoint = (Point) params.getShape("point");
    final double userQueryLon = queryPoint.x;
    final double userQueryLat = queryPoint.y;
    // Convert query point from lat/lng space to Sinusoidal space
    double cosPhiRad = Math.cos(queryPoint.y * Math.PI / 180);
    double projectedX = queryPoint.x * cosPhiRad;
    queryPoint.x = (projectedX + 180.0) / 10.0;
    queryPoint.y = (90.0 - queryPoint.y) / 10.0;
    final int h = (int) Math.floor(queryPoint.x);
    final int v = (int) Math.floor(queryPoint.y);
    final String tileID = String.format("h%02dv%02d", h, v);
    PathFilter rangeFilter = new PathFilter() {
        public boolean accept(Path p) {
            return p.getName().indexOf(tileID) >= 0;

    final Vector<Path> allMatchingFiles = new Vector<Path>();

    for (Path matchingPartition : matchingPartitions) {
        // Select all matching files
        FileStatus[] matchingFiles = fs.listStatus(matchingPartition, rangeFilter);
        for (FileStatus matchingFile : matchingFiles) {

    // All matching files are supposed to have the same resolution
    final int resolution = AggregateQuadTree.getResolution(fs, allMatchingFiles.get(0));

    final java.awt.Point queryInMatchingTile = new java.awt.Point();
    queryInMatchingTile.x = (int) Math.floor((queryPoint.x - h) * resolution);
    queryInMatchingTile.y = (int) Math.floor((queryPoint.y - v) * resolution);

    // 3- Query all matching files in parallel
    List<Long> threadsResults = Parallel.forEach(allMatchingFiles.size(), new RunnableRange<Long>() {
        public Long run(int i1, int i2) {
            ResultCollector<AggregateQuadTree.PointValue> internalOutput = output == null ? null
                    : new ResultCollector<AggregateQuadTree.PointValue>() {
                        NASAPoint middleValue = new NASAPoint(userQueryLon, userQueryLat, 0, 0);

                        public void collect(AggregateQuadTree.PointValue value) {
                            middleValue.value = value.value;
                            middleValue.timestamp = value.timestamp;

            long numOfResults = 0;
            for (int i_file = i1; i_file < i2; i_file++) {
                try {
                    Path matchingFile = allMatchingFiles.get(i_file);
                    java.awt.Rectangle query = new java.awt.Rectangle(queryInMatchingTile.x,
                            queryInMatchingTile.y, 1, 1);
                    AggregateQuadTree.selectionQuery(fs, matchingFile, query, internalOutput);
                } catch (IOException e) {
            return numOfResults;
    long totalResults = 0;
    for (long result : threadsResults) {
        totalResults += result;
    return totalResults;

From source file:edu.umn.cs.spatialHadoop.nasa.SpatioTemporalAggregateQuery.java

License:Open Source License

 * Performs a spatio-temporal aggregate query on an indexed directory
 * @param inFile// w  w w.  j a v  a2 s .  c o  m
 * @param params
 * @throws ParseException 
 * @throws IOException 
public static AggregateQuadTree.Node aggregateQuery(Path inFile, OperationsParams params)
        throws ParseException, IOException {
    // 1- Run a temporal filter step to find all matching temporal partitions
    Vector<Path> matchingPartitions = new Vector<Path>();
    // List of time ranges to check. Initially it contains one range as
    // specified by the user. Eventually, it can be split into at most two
    // partitions if partially matched by a partition.
    Vector<TimeRange> temporalRanges = new Vector<TimeRange>();
    temporalRanges.add(new TimeRange(params.get("time")));
    Path[] temporalIndexes = new Path[] { new Path(inFile, "yearly"), new Path(inFile, "monthly"),
            new Path(inFile, "daily") };
    int index = 0;
    final FileSystem fs = inFile.getFileSystem(params);
    while (index < temporalIndexes.length && !temporalRanges.isEmpty()) {
        Path indexDir = temporalIndexes[index];
        LOG.info("Checking index dir " + indexDir);
        TemporalIndex temporalIndex = new TemporalIndex(fs, indexDir);
        for (int iRange = 0; iRange < temporalRanges.size(); iRange++) {
            TimeRange range = temporalRanges.get(iRange);
            TemporalPartition[] matches = temporalIndex.selectContained(range.start, range.end);
            if (matches != null) {
                LOG.info("Matched " + matches.length + " partitions in " + indexDir);
                for (TemporalPartition match : matches) {
                    LOG.info("Matched temporal partition: " + match.dirName);
                    matchingPartitions.add(new Path(indexDir, match.dirName));
                // Update range to remove matching part
                TemporalPartition firstMatch = matches[0];
                TemporalPartition lastMatch = matches[matches.length - 1];
                if (range.start < firstMatch.start && range.end > lastMatch.end) {
                    // Need to split the range into two
                    temporalRanges.setElementAt(new TimeRange(range.start, firstMatch.start), iRange);
                    temporalRanges.insertElementAt(new TimeRange(lastMatch.end, range.end), iRange);
                } else if (range.start < firstMatch.start) {
                    // Update range in-place
                    range.end = firstMatch.start;
                } else if (range.end > lastMatch.end) {
                    // Update range in-place
                    range.start = lastMatch.end;
                } else {
                    // Current range was completely covered. Remove it

    numOfTemporalPartitionsInLastQuery = matchingPartitions.size();

    // 2- Find all matching files (AggregateQuadTrees) in matching partitions
    final Rectangle spatialRange = params.getShape("rect", new Rectangle()).getMBR();
    // Convert spatialRange from lat/lng space to Sinusoidal space
    double cosPhiRad = Math.cos(spatialRange.y1 * Math.PI / 180);
    double southWest = spatialRange.x1 * cosPhiRad;
    double southEast = spatialRange.x2 * cosPhiRad;
    cosPhiRad = Math.cos(spatialRange.y2 * Math.PI / 180);
    double northWest = spatialRange.x1 * cosPhiRad;
    double northEast = spatialRange.x2 * cosPhiRad;
    spatialRange.x1 = Math.min(northWest, southWest);
    spatialRange.x2 = Math.max(northEast, southEast);
    // Convert to the h v space used by MODIS
    spatialRange.x1 = (spatialRange.x1 + 180.0) / 10.0;
    spatialRange.x2 = (spatialRange.x2 + 180.0) / 10.0;
    spatialRange.y2 = (90.0 - spatialRange.y2) / 10.0;
    spatialRange.y1 = (90.0 - spatialRange.y1) / 10.0;
    // Vertically flip because the Sinusoidal space increases to the south
    double tmp = spatialRange.y2;
    spatialRange.y2 = spatialRange.y1;
    spatialRange.y1 = tmp;
    // Find the range of cells in MODIS Sinusoidal grid overlapping the range
    final int h1 = (int) Math.floor(spatialRange.x1);
    final int h2 = (int) Math.ceil(spatialRange.x2);
    final int v1 = (int) Math.floor(spatialRange.y1);
    final int v2 = (int) Math.ceil(spatialRange.y2);
    PathFilter rangeFilter = new PathFilter() {
        public boolean accept(Path p) {
            Matcher matcher = MODISTileID.matcher(p.getName());
            if (!matcher.matches())
                return false;
            int h = Integer.parseInt(matcher.group(1));
            int v = Integer.parseInt(matcher.group(2));
            return h >= h1 && h < h2 && v >= v1 && v < v2;

    final Vector<Path> allMatchingFiles = new Vector<Path>();

    for (Path matchingPartition : matchingPartitions) {
        // Select all matching files
        FileStatus[] matchingFiles = fs.listStatus(matchingPartition, rangeFilter);
        for (FileStatus matchingFile : matchingFiles) {

    // 3- Query all matching files in parallel
    Vector<Node> threadsResults = Parallel.forEach(allMatchingFiles.size(),
            new RunnableRange<AggregateQuadTree.Node>() {
                public Node run(int i1, int i2) {
                    Node threadResult = new AggregateQuadTree.Node();
                    for (int i_file = i1; i_file < i2; i_file++) {
                        try {
                            Path matchingFile = allMatchingFiles.get(i_file);
                            Matcher matcher = MODISTileID.matcher(matchingFile.getName());
                            matcher.matches(); // It has to match
                            int h = Integer.parseInt(matcher.group(1));
                            int v = Integer.parseInt(matcher.group(2));
                            // Clip the query region and normalize in this tile
                            Rectangle translated = spatialRange.translate(-h, -v);
                            int x1 = (int) (Math.max(translated.x1, 0) * 1200);
                            int y1 = (int) (Math.max(translated.y1, 0) * 1200);
                            int x2 = (int) (Math.min(translated.x2, 1.0) * 1200);
                            int y2 = (int) (Math.min(translated.y2, 1.0) * 1200);
                            AggregateQuadTree.Node fileResult = AggregateQuadTree.aggregateQuery(fs,
                                    matchingFile, new java.awt.Rectangle(x1, y1, (x2 - x1), (y2 - y1)));
                        } catch (IOException e) {
                    return threadResult;
    AggregateQuadTree.Node finalResult = new AggregateQuadTree.Node();
    for (Node threadResult : threadsResults)
    numOfTreesTouchesInLastRequest = allMatchingFiles.size();
    return finalResult;