Example usage for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName()

Source Link

Document

Returns the final component of this path.

Usage

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialInputFormat.java

License:Open Source License

@Override
protected boolean isSplitable(FileSystem fs, Path file) {
    // HDF files are not splittable
    if (file.getName().toLowerCase().endsWith(".hdf"))
        return false;
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec != null && !(codec instanceof SplittableCompressionCodec))
        return false;

    try {/*w w  w  . j a v  a 2  s . c  o  m*/
        // To avoid opening the file and checking the first 8-bytes to look for
        // an R-tree signature, we never split a file read over HTTP
        if (fs instanceof HTTPFileSystem)
            return false;
        // ... and never split a file less than 150MB to perform better with many small files
        if (fs.getFileStatus(file).getLen() < 150 * 1024 * 1024)
            return false;
        return !SpatialSite.isRTree(fs, file);
    } catch (IOException e) {
        return super.isSplitable(fs, file);
    }
}

From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3.java

License:Open Source License

@Override
protected boolean isSplitable(JobContext context, Path file) {
    try {/*from w  w w .  java  2  s.c  o  m*/
        // Create compressionCodecs to be used by isSplitable method
        if (compressionCodecs == null)
            compressionCodecs = new CompressionCodecFactory(context.getConfiguration());
        FileSystem fs = file.getFileSystem(context.getConfiguration());
        // HDF files are not splittable
        if (file.getName().toLowerCase().endsWith(".hdf"))
            return false;
        final CompressionCodec codec = compressionCodecs.getCodec(file);
        if (codec != null && !(codec instanceof SplittableCompressionCodec))
            return false;

        // To avoid opening the file and checking the first 8-bytes to look for
        // an R-tree signature, we never split a file read over HTTP
        if (fs instanceof HTTPFileSystem)
            return false;
        // ... and never split a file less than 150MB to perform better with many small files
        if (fs.getFileStatus(file).getLen() < 150 * 1024 * 1024)
            return false;
        return !SpatialSite.isRTree(fs, file);
    } catch (IOException e) {
        LOG.warn("Error while determining whether a file is splittable", e);
        return false; // Safer to not split it
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Creates a full spatio-temporal hierarchy for a source folder
 * @throws ParseException /*from   ww  w. ja va 2  s . c o m*/
 * @throws InterruptedException 
 */
public static void directoryIndexer(final OperationsParams params)
        throws IOException, ParseException, InterruptedException {
    Path inputDir = params.getInputPath();
    FileSystem sourceFs = inputDir.getFileSystem(params);
    final Path sourceDir = inputDir.makeQualified(sourceFs);
    Path destDir = params.getOutputPath();
    final FileSystem destFs = destDir.getFileSystem(params);

    TimeRange timeRange = params.get("time") != null ? new TimeRange(params.get("time")) : null;

    // Create daily indexes that do not exist
    final Path dailyIndexDir = new Path(destDir, "daily");
    FileStatus[] mathcingDays = timeRange == null ? sourceFs.listStatus(inputDir)
            : sourceFs.listStatus(inputDir, timeRange);
    final Vector<Path> sourceFiles = new Vector<Path>();
    for (FileStatus matchingDay : mathcingDays) {
        for (FileStatus matchingTile : sourceFs.listStatus(matchingDay.getPath())) {
            sourceFiles.add(matchingTile.getPath());
        }

    }
    // Shuffle the array for better load balancing across threads
    Collections.shuffle(sourceFiles);
    final String datasetName = params.get("dataset");
    Parallel.forEach(sourceFiles.size(), new RunnableRange<Object>() {
        @Override
        public Object run(int i1, int i2) {
            LOG.info("Worker [" + i1 + "," + i2 + ") started");
            for (int i = i1; i < i2; i++) {
                Path sourceFile = sourceFiles.get(i);
                try {
                    Path relativeSourceFile = makeRelative(sourceDir, sourceFile);
                    Path destFilePath = new Path(dailyIndexDir, relativeSourceFile);
                    if (!destFs.exists(destFilePath)) {
                        LOG.info("Worker [" + i1 + "," + i2 + ") indexing: " + sourceFile.getName());
                        Path tmpFile;
                        do {
                            tmpFile = new Path((int) (Math.random() * 1000000) + ".tmp");
                        } while (destFs.exists(tmpFile));
                        tmpFile = tmpFile.makeQualified(destFs);
                        if (datasetName == null)
                            throw new RuntimeException(
                                    "Please provide the name of dataset you would like to index");
                        AggregateQuadTree.build(params, sourceFile, datasetName, tmpFile);
                        synchronized (destFs) {
                            Path destDir = destFilePath.getParent();
                            if (!destFs.exists(destDir))
                                destFs.mkdirs(destDir);
                        }
                        destFs.rename(tmpFile, destFilePath);
                    }
                } catch (IOException e) {
                    throw new RuntimeException("Error building an index for " + sourceFile, e);
                }
            }
            LOG.info("Worker [" + i1 + "," + i2 + ") finished");
            return null;
        }

    });
    LOG.info("Done generating daily indexes");

    // Merge daily indexes into monthly indexes
    Path monthlyIndexDir = new Path(destDir, "monthly");
    final SimpleDateFormat dayFormat = new SimpleDateFormat("yyyy.MM.dd");
    final SimpleDateFormat monthFormat = new SimpleDateFormat("yyyy.MM");
    mergeIndexes(destFs, dailyIndexDir, monthlyIndexDir, dayFormat, monthFormat, params);
    LOG.info("Done generating monthly indexes");

    // Merge daily indexes into monthly indexes
    Path yearlyIndexDir = new Path(destDir, "yearly");
    final SimpleDateFormat yearFormat = new SimpleDateFormat("yyyy");
    mergeIndexes(destFs, monthlyIndexDir, yearlyIndexDir, monthFormat, yearFormat, params);
    LOG.info("Done generating yearly indexes");
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Merges a set of indexes into larger indexes
 * @param fs/*from   w ww.  j  a v a 2s  . c  o m*/
 * @param srcIndexDir
 * @param dstIndexDir
 * @param srcFormat
 * @param dstFormat
 * @param params
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 */
private static void mergeIndexes(final FileSystem fs, Path srcIndexDir, Path dstIndexDir,
        SimpleDateFormat srcFormat, SimpleDateFormat dstFormat, final OperationsParams params)
        throws IOException, ParseException, InterruptedException {
    TimeRange timeRange = params.get("time") != null ? new TimeRange(params.get("time")) : null;
    final FileStatus[] sourceIndexes = timeRange == null ? fs.listStatus(srcIndexDir)
            : fs.listStatus(srcIndexDir, timeRange);
    Arrays.sort(sourceIndexes); // Alphabetical sort acts as sort-by-date here

    // Scan the source indexes and merge each consecutive run belonging to the
    // same unit
    int i1 = 0;
    while (i1 < sourceIndexes.length) {
        final String indexToCreate = dstFormat.format(srcFormat.parse(sourceIndexes[i1].getPath().getName()));
        int i2 = i1 + 1;
        // Keep scanning as long as the source index belongs to the same dest index
        while (i2 < sourceIndexes.length && dstFormat
                .format(srcFormat.parse(sourceIndexes[i2].getPath().getName())).equals(indexToCreate))
            i2++;

        // Merge all source indexes in the range [i1, i2) into one dest index

        // Copy i1, i2 to other variables as final to be accessible from threads
        final int firstIndex = i1;
        final int lastIndex = i2;

        final Path destIndex = new Path(dstIndexDir, indexToCreate);

        // For each tile, merge all values in all source indexes
        /*A regular expression to catch the tile identifier of a MODIS grid cell*/
        final Pattern MODISTileID = Pattern.compile("^.*(h\\d\\dv\\d\\d).*$");
        final FileStatus[] tilesInFirstDay = fs.listStatus(sourceIndexes[i1].getPath());
        // Shuffle the array for better load balancing across threads
        Random rand = new Random();
        for (int i = 0; i < tilesInFirstDay.length - 1; i++) {
            // Swap the entry at i with any following entry
            int j = i + rand.nextInt(tilesInFirstDay.length - i - 1);
            FileStatus temp = tilesInFirstDay[i];
            tilesInFirstDay[i] = tilesInFirstDay[j];
            tilesInFirstDay[j] = temp;
        }
        Parallel.forEach(tilesInFirstDay.length, new RunnableRange<Object>() {
            @Override
            public Object run(int i_file1, int i_file2) {
                for (int i_file = i_file1; i_file < i_file2; i_file++) {
                    try {
                        FileStatus tileInFirstDay = tilesInFirstDay[i_file];

                        // Extract tile ID
                        Matcher matcher = MODISTileID.matcher(tileInFirstDay.getPath().getName());
                        if (!matcher.matches()) {
                            LOG.warn("Cannot extract tile id from file " + tileInFirstDay.getPath());
                            continue;
                        }

                        final String tileID = matcher.group(1);
                        Path destIndexFile = new Path(destIndex, tileID);

                        PathFilter tileFilter = new PathFilter() {
                            @Override
                            public boolean accept(Path path) {
                                return path.getName().contains(tileID);
                            }
                        };

                        // Find matching tiles in all source indexes to merge
                        Vector<Path> filesToMerge = new Vector<Path>(lastIndex - firstIndex);
                        filesToMerge.add(tileInFirstDay.getPath());
                        for (int iDailyIndex = firstIndex + 1; iDailyIndex < lastIndex; iDailyIndex++) {
                            FileStatus[] matchedTileFile = fs.listStatus(sourceIndexes[iDailyIndex].getPath(),
                                    tileFilter);
                            if (matchedTileFile.length == 0)
                                LOG.warn("Could not find tile " + tileID + " in dir "
                                        + sourceIndexes[iDailyIndex].getPath());
                            else if (matchedTileFile.length == 1)
                                filesToMerge.add(matchedTileFile[0].getPath());
                        }

                        if (fs.exists(destIndexFile)) {
                            // Destination file already exists
                            // Check the date of the destination and source files to see
                            // whether it needs to be updated or not
                            long destTimestamp = fs.getFileStatus(destIndexFile).getModificationTime();
                            boolean needsUpdate = false;
                            for (Path fileToMerge : filesToMerge) {
                                long sourceTimestamp = fs.getFileStatus(fileToMerge).getModificationTime();
                                if (sourceTimestamp > destTimestamp) {
                                    needsUpdate = true;
                                    break;
                                }
                            }
                            if (!needsUpdate)
                                continue;
                            else
                                LOG.info("Updating file " + destIndexFile.getName());
                        }

                        // Do the merge
                        Path tmpFile;
                        do {
                            tmpFile = new Path((int) (Math.random() * 1000000) + ".tmp");
                        } while (fs.exists(tmpFile));
                        tmpFile = tmpFile.makeQualified(fs);
                        LOG.info("Merging tile " + tileID + " into file " + destIndexFile);
                        AggregateQuadTree.merge(params, filesToMerge.toArray(new Path[filesToMerge.size()]),
                                tmpFile);
                        synchronized (fs) {
                            Path destDir = destIndexFile.getParent();
                            if (!fs.exists(destDir))
                                fs.mkdirs(destDir);
                        }
                        fs.rename(tmpFile, destIndexFile);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
                return null;
            }
        });
        i1 = i2;
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Make a path relative to another path by removing all common ancestors
 * @param parent/*  www  .  j  ava 2 s.  com*/
 * @param descendant
 * @return
 */
private static Path makeRelative(Path parent, Path descendant) {
    Stack<String> components = new Stack<String>();
    while (descendant.depth() > parent.depth()) {
        components.push(descendant.getName());
        descendant = descendant.getParent();
    }
    if (!descendant.equals(parent))
        throw new RuntimeException("descendant not a child of parent");
    if (components.isEmpty())
        return new Path(".");
    Path relative = new Path(components.pop());
    while (!components.isEmpty())
        relative = new Path(relative, components.pop());
    return relative;
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFPlot.java

License:Open Source License

public static Job plotHeatMap(Path[] inFiles, Path outFile, OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    if (params.get("shape") == null) {
        // Set the default shape value
        params.setClass("shape", NASARectangle.class, Shape.class);
    } else if (!(params.getShape("shape") instanceof NASAShape)) {
        System.err.println("The specified shape " + params.get("shape") + " in not an instance of NASAShape");
        System.exit(1);//from w w w. j av a 2 s .c  o m
    }

    if (params.get("mbr") == null) {
        // Set to the same value as query rectangle or the whole world
        params.set("mbr", params.get("rect", "-180,-90,180,90"));
    }

    // Restrict to HDF files if working on a directory
    for (int i = 0; i < inFiles.length; i++) {
        if (!inFiles[i].getName().toLowerCase().endsWith(".hdf"))
            inFiles[i] = new Path(inFiles[i], "*.hdf");
    }
    String recover = params.get("recover", "none").toLowerCase();
    if (recover.equals("none")) {
        // Don't recover holes
        params.setBoolean("recoverholes", false);
    } else if (recover.equals("read")) {
        // Recover holes on read
        params.setBoolean("recoverholes", true);
    } else if (recover.equals("write")) {
        // Recover holes upon writing the final image
        params.setBoolean("recoverholes", false);
        if (params.get(PREPROCESSED_WATERMARK) == null) {
            OperationsParams params2 = new OperationsParams(params);
            params2.setBoolean("background", false);
            Path wmImage = new Path(outFile.getParent(), outFile.getName() + "_WaterMask");
            generateWaterMask(wmImage, params2);
            params.set(PREPROCESSED_WATERMARK, wmImage.toString());
        }
    }
    if (params.getBoolean("pyramid", false))
        return MultilevelPlot.plot(inFiles, outFile, HDFRasterizer.class, params);
    else
        return SingleLevelPlot.plot(inFiles, outFile, HDFRasterizer.class, params);
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java

License:Open Source License

/**
 * Recover fill values in the array {@link Values}.
 * @param conf//from  w  ww .  j ava 2  s.  co  m
 * @throws IOException 
 * @throws Exception 
 */
private void recoverFillValues(Configuration conf) throws IOException {
    // For now, we can only recover values of type short
    HDFFile waterMaskFile = null;
    try {
        // Read water mask
        Path wmPath = new Path(
                conf.get(WATER_MASK_PATH, "http://e4ftl01.cr.usgs.gov/MOLT/MOD44W.005/2000.02.24/"));
        final String tileIdentifier = String.format("h%02dv%02d", nasaDataset.h, nasaDataset.v);
        FileSystem wmFs = wmPath.getFileSystem(conf);
        FileStatus[] wmFile = wmFs.listStatus(wmPath, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().contains(tileIdentifier);
            }
        });
        if (wmFile.length == 0) {
            LOG.warn("Could not find water mask for tile '" + tileIdentifier + "'");
            return;
        }
        Path wmFileToLoad = wmFile[0].getPath();
        if (wmFs instanceof HTTPFileSystem) {
            wmFileToLoad = new Path(FileUtil.copyFile(conf, wmFileToLoad));
            wmFs = FileSystem.getLocal(conf);
        }
        waterMaskFile = new HDFFile(wmFs.open(wmFileToLoad));
        DDVGroup waterMaskGroup = waterMaskFile.findGroupByName("water_mask");
        if (waterMaskGroup == null) {
            LOG.warn("Water mask dataset 'water_mask' not found in file " + wmFile[0]);
            return;
        }
        byte[] waterMask = null;
        for (DataDescriptor dd : waterMaskGroup.getContents()) {
            if (dd instanceof DDNumericDataGroup) {
                DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
                waterMask = (byte[]) numericDataGroup.getAsByteArray();
            }
        }
        // Convert the waterMask to a BinArray of the right size
        int size = 4800 / nasaDataset.resolution;
        BitArray waterMaskBits = convertWaterMaskToBits(ByteBuffer.wrap(waterMask), size);

        short fillValueShort = (short) HDFConstants.readAsInteger(fillValueBytes, 0, fillValueBytes.length);
        recoverXYShorts(ByteBuffer.wrap(unparsedDataArray), fillValueShort, waterMaskBits);
    } finally {
        if (waterMaskFile != null)
            waterMaskFile.close();
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader3.java

License:Open Source License

/**
 * Recover fill values in the array {@link Values}.
 * @param conf//ww  w  .  j  a  v a  2 s.c o m
 * @throws IOException 
 * @throws Exception 
 */
private void recoverFillValues(Configuration conf) throws IOException {
    HDFFile waterMaskFile = null;
    try {
        // Read water mask
        Path wmPath = new Path(
                conf.get(WATER_MASK_PATH, "http://e4ftl01.cr.usgs.gov/MOLT/MOD44W.005/2000.02.24/"));
        final String tileIdentifier = String.format("h%02dv%02d", nasaDataset.h, nasaDataset.v);
        FileSystem wmFs = wmPath.getFileSystem(conf);
        FileStatus[] wmFile = wmFs.listStatus(wmPath, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().contains(tileIdentifier);
            }
        });
        if (wmFile.length == 0) {
            LOG.warn("Could not find water mask for tile '" + tileIdentifier + "'");
            return;
        }
        Path wmFileToLoad = wmFile[0].getPath();
        if (wmFs instanceof HTTPFileSystem) {
            wmFileToLoad = new Path(FileUtil.copyFile(conf, wmFileToLoad));
            wmFs = FileSystem.getLocal(conf);
        }
        waterMaskFile = new HDFFile(wmFs.open(wmFileToLoad));
        DDVGroup waterMaskGroup = waterMaskFile.findGroupByName("water_mask");
        if (waterMaskGroup == null) {
            LOG.warn("Water mask dataset 'water_mask' not found in file " + wmFile[0]);
            return;
        }
        byte[] waterMask = null;
        for (DataDescriptor dd : waterMaskGroup.getContents()) {
            if (dd instanceof DDNumericDataGroup) {
                DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
                waterMask = (byte[]) numericDataGroup.getAsAnArray();
            }
        }

        // Stores which values has been recovered by copying a single value
        // without interpolation in the x-direction
        byte[] valueStatus = new byte[dataArray.length];

        recoverXDirection(waterMask, valueStatus);
        recoverYDirection(waterMask, valueStatus);
    } finally {
        if (waterMaskFile != null)
            waterMaskFile.close();
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.HTTPFileSystem.java

License:Open Source License

/**
 * Returns the status of a file. This method is designed specifically to work
 * with LP DAAC archive and will not work correctly with other web sites.
 * Since HTTP does not tell whether a URL points to a file or directory,
 * we assume that URLs ending with HDF, XML and JPG are files while anything
 * else is considered a directory.//from w  w w . ja va 2  s .  com
 */
@Override
public FileStatus getFileStatus(Path f) throws IOException {
    f = f.makeQualified(this);
    URL url = f.toUri().toURL();
    int retryCount = HTTPFileSystem.retries;

    HttpURLConnection connection = null;
    try {
        while (connection == null && retryCount-- > 0) {
            try {
                connection = (HttpURLConnection) url.openConnection();
            } catch (java.net.SocketException e) {
                if (retryCount == 0)
                    throw e;
                LOG.info("Error accessing file '" + url + "'. Trials left: " + retryCount);
                try {
                    ;
                    Thread.sleep(1000);
                } catch (InterruptedException e1) {
                }
            } catch (java.net.UnknownHostException e) {
                if (retryCount == 0)
                    throw e;
                LOG.info("Error accessing file '" + url + "'. Trials left: " + retryCount);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e1) {
                }
            }
        }

        if (connection == null)
            throw new RuntimeException("Could not connect to " + f);
        String lengthStr = connection.getHeaderField("content-Length");
        long length = lengthStr == null ? -1 : Long.parseLong(lengthStr);
        if (length == -1)
            LOG.info("Unknown HTTP file length " + length);
        long modificationTime = connection.getLastModified();
        if (modificationTime == 0)
            modificationTime = connection.getDate();
        // Hard coded to work with LP DAAC archives
        boolean isdir = !f.getName().matches("(?i:([^*\\?])*\\.(hdf|xml|jpg|gz|bz2|zip|txt|csv|tsv)$)");
        return new FileStatus(length, isdir, 1, BLOCK_SIZE, modificationTime, 0, null, null, null, f);
    } finally {
        if (connection != null)
            connection.disconnect();
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.ImageCompare.java

License:Open Source License

/**
 * Compares two directories for similar images with matching names.
 * @param dir1//from   w ww  .jav a  2s .  com
 * @param dir2
 * @throws IOException 
 */
public static void compareFolders(Path dir1, Path dir2) throws IOException {
    final PathFilter png_filter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().toLowerCase().endsWith(".png");
        }
    };
    // Retrieve all images in dir1
    FileStatus[] images1 = dir1.getFileSystem(new Configuration()).listStatus(dir1, png_filter);
    Map<String, Path> images1ByName = new HashMap<String, Path>();
    for (FileStatus fstatus : images1)
        images1ByName.put(fstatus.getPath().getName(), fstatus.getPath());

    // Retrieve all images in dir2
    FileStatus[] images2 = dir2.getFileSystem(new Configuration()).listStatus(dir2, png_filter);
    Map<String, Path> images2ByName = new HashMap<String, Path>();
    for (FileStatus fstatus : images2)
        images2ByName.put(fstatus.getPath().getName(), fstatus.getPath());

    final Vector<Double> similarities = new Vector<Double>();
    final Vector<String> names = new Vector<String>();

    // Compare every pair of images with similar names
    for (String imageName : images2ByName.keySet()) {
        Path image1 = images1ByName.get(imageName);
        if (image1 == null)
            continue;
        Path image2 = images2ByName.get(imageName);
        double similarity = compareImages(image1, image2);

        if (similarity > 0.1) {
            System.out.println(image1 + "," + image2 + "," + similarity);
            similarities.add(similarity);
            names.add(imageName);
        }
    }
    /*
    // Sort images by similarity
    IndexedSortable sortable = new IndexedSortable() {
      @Override
      public int compare(int i, int j) {
        double diff = similarities.get(i) - similarities.get(j);
        if (diff < 0)
          return -1;
        if (diff > 0)
          return 1;
        return 0;
      }
            
      @Override
      public void swap(int i, int j) {
        double tempSim = similarities.get(i);
        similarities.set(i, similarities.get(j));
        similarities.set(j, tempSim);
                
        String tempName = names.get(i);
        names.set(i, names.get(j));
        names.set(j, tempName);
      }
    };
            
    final IndexedSorter sorter = new QuickSort();
    sorter.sort(sortable, 0, names.size());
    final float threshold = 0.0f;
    // Display to 10 percentile matches
    for (int i = (int) (names.size() * threshold); i < names.size(); i++) {
      System.out.println(similarities.get(i)+ " ... "+names.get(i));
    }*/
}