Example usage for org.apache.hadoop.fs FileStatus getModificationTime

List of usage examples for org.apache.hadoop.fs FileStatus getModificationTime

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getModificationTime.

Prototype

public long getModificationTime() 

Source Link

Document

Get the modification time of the file.

Usage

From source file:com.stumbleupon.hbaseadmin.ClusterUtils.java

License:Open Source License

/**
 * Remove any regions that do not qualify for compaction
 * @param admin The hbase admin//from  w  w  w  . j a v a  2s. c o  m
 * @param serverName The server name
 * @param server The HRegion interface
 * @return The filtered regions
 * @throws IOException 
 */
private HRegionInfo getNextEligibleRegion(HBaseAdmin admin, ServerName serverName, HRegionInterface server)
        throws IOException {
    HRegionInfo ret = null;
    List<HRegionInfo> onlineRegions = server.getOnlineRegions();
    String hostport = serverName.getHostAndPort();
    HServerLoad serverLoad = clusterStatus.getLoad(serverName);

    if (serverLoad == null) {
        LOG.warn("Skipping server {} because could not get server load", hostport);
    } else {
        List<String> tableNames = compact.getTableNames();
        boolean excludeFromList = compact.getExcludeTables();
        Map<byte[], RegionLoad> regionLoadMap = serverLoad.getRegionsLoad();
        List<String> reasons = new ArrayList<String>();

        for (HRegionInfo region : onlineRegions) {
            String regionName = region.getRegionNameAsString();
            String tableName = region.getTableNameAsString();
            reasons.clear();

            // Ignore any regions in tables that are marked as excluded
            if (tableNames.size() > 0) {
                if (excludeFromList && tableNames.contains(tableName)) {
                    continue;
                } else if (!excludeFromList && !tableNames.contains(tableName)) {
                    continue;
                } else if (LOG.isDebugEnabled()) {
                    reasons.add(hostport + " [" + regionName + "] qualifies because its table '" + tableName
                            + "' has NOT been excluded");
                }
            }

            // Ignore any regions that we have already visited/compacted
            if (visitedRegions.isRegionVisited(hostport, regionName)) {
                continue;
            } else if (LOG.isDebugEnabled()) {
                reasons.add(hostport + " [" + regionName + "] qualifies because it has NOT been visited");
            }

            // Remove any regions that do not have enough store files to qualify for compaction
            RegionLoad regionLoad = regionLoadMap.get(region.getRegionName());
            boolean isRegionEligible = true;

            if (regionLoad == null) {
                LOG.warn("Could not get region load for '{}'. Skipping region...", regionName);
                continue;
            } else {
                try {
                    int numFamilies = getTableDescriptor(admin, region).getColumnFamilies().length;
                    int numRegionStoreFiles = regionLoad.getStorefiles();
                    int minStoreFilesNeeded = compact.getNumStoreFiles() * numFamilies;

                    if (numRegionStoreFiles >= minStoreFilesNeeded) {
                        isRegionEligible = true;

                        if (LOG.isDebugEnabled()) {
                            reasons.add(hostport + " [" + regionName + "] qualifies because it has a total of "
                                    + numRegionStoreFiles + " store files in " + numFamilies + " families");
                        }
                    } else {
                        if (LOG.isDebugEnabled()) {
                            reasons.add(hostport + " [" + regionName
                                    + "] does not qualify because it has a total of " + numRegionStoreFiles
                                    + " store files in " + numFamilies + " families. Needs at least "
                                    + minStoreFilesNeeded);
                        }

                        isRegionEligible = false;
                    }
                } catch (TableNotFoundException e) {
                    LOG.error("Could not determine if region '{}' is eligible. Skipping region.", regionName,
                            e);
                    continue;
                } catch (IOException e) {
                    LOG.error("Could not determine if region '{}' is eligible. Skipping region.", regionName,
                            e);
                    continue;
                } catch (Exception e) {
                    LOG.error("Could not determine if region '{}' is eligible. Skipping region.", regionName,
                            e);
                    continue;
                }
            }

            // If enabled, force compaction of any regions that contain store files older than maxStoreFileAge 
            if (!isRegionEligible && compact.getMaxStoreFileAge() > 0) {
                List<String> files = server.getStoreFileList(region.getRegionName());
                FileSystem fs = FileSystem.get(admin.getConfiguration());

                if (files != null) {
                    Path[] filePaths = new Path[files.size()];
                    for (int i = 0; i < files.size(); i++) {
                        filePaths[i] = new Path(files.get(0));
                    }

                    long maxStoreFileAge = compact.getMaxStoreFileAge();
                    long now = System.currentTimeMillis();
                    FileStatus[] storeFilesStatus = fs.listStatus(filePaths);

                    for (FileStatus fileStatus : storeFilesStatus) {
                        long storeFileAge = now - fileStatus.getModificationTime();

                        if (storeFileAge > maxStoreFileAge) {
                            isRegionEligible = true;

                            if (LOG.isDebugEnabled()) {
                                reasons.add(hostport + " [" + regionName + "] forced to qualify because "
                                        + "at least one store file is older than the specified maxStoreFileAge");
                            }

                            break;
                        }
                    }
                }
            }

            if (isRegionEligible) {
                if (reasons.size() > 0) {
                    for (String reason : reasons) {
                        LOG.debug(reason);
                    }
                }

                ret = region;
                break;
            }
        }
    }

    return ret;
}

From source file:com.sun.kohsuke.hadoop.importer.App.java

License:Open Source License

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        System.out.println("Usage: java -jar importer.jar [HDFS URL] [local directory] [HDFS directory]");
        System.exit(-1);//  w  w  w  . j  a  va  2  s. com
    }

    Configuration conf = new Configuration();
    conf.set("fs.default.name", args[0]);
    DFSClient dfs = new DFSClient(conf);

    File in = new File(args[1]);
    String out = args[2];

    File[] children = in.listFiles(new FileFilter() {
        public boolean accept(File child) {
            return child.isFile();
        }
    });
    if (children == null) {
        System.out.println("No such directory exists: " + in);
        System.exit(-1);
    }
    int cnt = 1;
    for (File f : children) {
        String dest = out + '/' + f.getName();
        FileStatus i = dfs.getFileInfo(dest);
        if (i == null || i.getModificationTime() != f.lastModified() || i.getLen() != f.length()) {
            System.out.printf("(%d/%d) Importing %s\n", cnt, children.length, f);
            try {
                IOUtils.copyBytes(new FileInputStream(f), dfs.create(dest, true), conf);
                dfs.setTimes(dest, f.lastModified(), f.lastModified());
            } catch (RemoteException e) {
                // failure to create
                e.printStackTrace();
            }
        } else {
            System.out.printf("(%d/%d) Skipping %s\n", cnt, children.length, f);
        }
        cnt++;
    }
}

From source file:com.talis.hadoop.rdf.ZipUtils.java

License:Apache License

/**
 * Write a file to a zip output stream, removing leading path name components
 * from the actual file name when creating the zip file entry.
 * /*  www .j  a va2  s  .  c  o m*/
 * The entry placed in the zip file is <code>baseName</code>/
 * <code>relativePath</code>, where <code>relativePath</code> is constructed
 * by removing a leading <code>root</code> from the path for
 * <code>itemToZip</code>.
 * 
 * If <code>itemToZip</code> is an empty directory, it is ignored. If
 * <code>itemToZip</code> is a directory, the contents of the directory are
 * added recursively.
 * 
 * @param zos The zip output stream
 * @param baseName The base name to use for the file name entry in the zip
 *        file
 * @param root The path to remove from <code>itemToZip</code> to make a
 *        relative path name
 * @param itemToZip The path to the file to be added to the zip file
 * @return the number of entries added
 * @throws IOException
 */
static public int zipDirectory(final Configuration conf, final ZipOutputStream zos, final String baseName,
        final String root, final Path itemToZip) throws IOException {
    LOG.info("zipDirectory: {} {} {}", new Object[] { baseName, root, itemToZip });
    LocalFileSystem localFs = FileSystem.getLocal(conf);
    int count = 0;

    final FileStatus itemStatus = localFs.getFileStatus(itemToZip);
    if (itemStatus.isDir()) {
        final FileStatus[] statai = localFs.listStatus(itemToZip);

        // Add a directory entry to the zip file
        final String zipDirName = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);
        final ZipEntry dirZipEntry = new ZipEntry(zipDirName + Path.SEPARATOR_CHAR);
        LOG.info(String.format("Adding directory %s to zip", zipDirName));
        zos.putNextEntry(dirZipEntry);
        zos.closeEntry();
        count++;

        if (statai == null || statai.length == 0) {
            LOG.info(String.format("Skipping empty directory %s", itemToZip));
            return count;
        }
        for (FileStatus status : statai) {
            count += zipDirectory(conf, zos, baseName, root, status.getPath());
        }
        LOG.info(String.format("Wrote %d entries for directory %s", count, itemToZip));
        return count;
    }

    final String inZipPath = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);

    if (inZipPath.length() == 0) {
        LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", itemToZip, root, baseName));
        return 0;
    }

    // Take empty files in case the place holder is needed
    FSDataInputStream in = null;
    try {
        in = localFs.open(itemToZip);
        final ZipEntry ze = new ZipEntry(inZipPath);
        ze.setTime(itemStatus.getModificationTime());
        // Comments confuse looking at the zip file
        // ze.setComment(itemToZip.toString());
        zos.putNextEntry(ze);

        IOUtils.copyBytes(in, zos, conf, false);
        zos.closeEntry();
        LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip));
        return 1;
    } finally {
        in.close();
    }

}

From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.HighWaterMarkInputFormat.java

License:Apache License

@Nonnull
@Override// w  w w.  j a v  a  2  s  .c  o m
public List<FileStatus> listStatus(@Nonnull final JobContext job) throws IOException {
    // Get job configuration
    long highWaterMark = Math.max(lastHighWaterMark, getHighWaterMark(job));
    final long maxAge = HighWaterMarkInputFormat.getMaxFileAge(job);
    final long minAge = HighWaterMarkInputFormat.getMinFileAge(job);

    if (minAge > maxAge) {
        throw new IOException(MIN_FILE_AGE + " cannot be greater than " + MAX_FILE_AGE);
    }

    // List and filter files
    final List<FileStatus> allFiles = super.listStatus(job);
    final List<FileStatus> jobFiles = new ArrayList<>(allFiles.size());
    final long currentTime = currentTimeMillis();

    for (final FileStatus file : allFiles) {
        final long fileTime = file.getModificationTime();
        final long fileAge = currentTime - fileTime;

        if (!file.isDirectory() && fileAge >= minAge && fileAge <= maxAge && fileTime > highWaterMark) {
            jobFiles.add(file);

            if (fileTime > lastHighWaterMark) {
                lastHighWaterMark = fileTime;
            }
        }
    }

    lastHighWaterMark = Math.max(lastHighWaterMark, highWaterMark);
    return jobFiles;
}

From source file:com.toy.Client.java

License:Apache License

private static void registerLocalResource(Map<String, LocalResource> localResources, ApplicationId appId,
        FileSystem fs, Path src) throws IOException {
    String pathSuffix = Constants.TOY_PREFIX + appId.toString() + "/" + src.getName();
    Path dst = new Path(fs.getHomeDirectory(), pathSuffix);
    LOG.info("Copy {} from local filesystem to {} and add to local environment", src.getName(), dst.toUri());
    fs.copyFromLocalFile(false, true, src, dst);
    FileStatus destStatus = fs.getFileStatus(dst);
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
    amJarRsrc.setType(LocalResourceType.FILE);
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    amJarRsrc.setTimestamp(destStatus.getModificationTime());
    amJarRsrc.setSize(destStatus.getLen());
    localResources.put(src.getName(), amJarRsrc);
}

From source file:com.toy.Client.java

License:Apache License

private void uploadDepAndRegister(Map<String, LocalResource> localResources, ApplicationId appId, FileSystem fs,
        String depname) throws IOException {
    File dep = new File(depname);
    if (!dep.exists())
        throw new IOException(dep.getAbsolutePath() + " does not exist");
    Path dst = new Path(fs.getHomeDirectory(), Constants.TOY_PREFIX + appId.toString() + "/" + dep.getName());
    LOG.info("Copy {} from local filesystem to {} and add to local environment", dep.getName(), dst.toUri());
    FileInputStream input = new FileInputStream(dep);
    final FSDataOutputStream outputStream = fs.create(dst, true);
    ByteStreams.copy(input, outputStream);
    input.close();/*  w ww  .  jav  a 2s. co m*/
    outputStream.close();
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
    amJarRsrc.setType(LocalResourceType.FILE);
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    FileStatus destStatus = fs.getFileStatus(dst);
    amJarRsrc.setTimestamp(destStatus.getModificationTime());
    amJarRsrc.setSize(destStatus.getLen());
    localResources.put(dep.getName(), amJarRsrc);

}

From source file:com.trendmicro.hdfs.webdav.HDFSResource.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void populateProperties() {
    if (properties != null) {
        return;/*from www . ja  va 2 s. c  o m*/
    }
    properties = new DavPropertySet();
    FileStatus stat = null;
    try {
        stat = user.doAs(new PrivilegedExceptionAction<FileStatus>() {
            public FileStatus run() throws Exception {
                return FileSystem.get(conf).getFileStatus(getPath());
            }
        });
    } catch (IOException ex) {
        LOG.warn(StringUtils.stringifyException(ex));
    } catch (InterruptedException e) {
        LOG.warn(StringUtils.stringifyException(e));
    }
    if (stat != null) {
        properties.add(new DefaultDavProperty(DavPropertyName.GETCONTENTLENGTH, stat.getLen()));
        SimpleDateFormat simpleFormat = (SimpleDateFormat) DavConstants.modificationDateFormat.clone();
        simpleFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
        Date date = new Date(stat.getModificationTime());
        properties.add(new DefaultDavProperty(DavPropertyName.GETLASTMODIFIED, simpleFormat.format(date)));
        properties.add(new DefaultDavProperty(SecurityConstants.OWNER, stat.getOwner()));
        properties.add(new DefaultDavProperty(SecurityConstants.GROUP, stat.getGroup()));
        // TODO: Populate DAV property SecurityConstants.CURRENT_USER_PRIVILEGE_SET
    }
    if (getDisplayName() != null) {
        properties.add(new DefaultDavProperty(DavPropertyName.DISPLAYNAME, getDisplayName()));
    }
    if (isCollection()) {
        properties.add(new ResourceType(ResourceType.COLLECTION));
        // Windows XP support
        properties.add(new DefaultDavProperty(DavPropertyName.ISCOLLECTION, "1"));
    } else {
        properties.add(new ResourceType(ResourceType.DEFAULT_RESOURCE));
        // Windows XP support
        properties.add(new DefaultDavProperty(DavPropertyName.ISCOLLECTION, "0"));
    }
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to move files from HDFS to local filesystem
 *
 * localPath: Path on the machines filesystem
 * fs:FileSystem object from HDFS/*from   www  .  j  ava2 s  . c o m*/
 * pathList:List of paths for files that might need to be backed
 * up
 * size:max size in bytes to be backed up
 *
 * ReturnsDate of the last files backed up if reached size limit,
 * else, zero
 **/
public long backupFiles(String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList,
        long size) {
    Path fsPath;
    long tmpSize = 0;
    long tmpDate = 0;

    // Start iterating over all paths
    for (Path hdfsPath : pathList) {
        try {
            long nFileSize = fs.getContentSummary(hdfsPath).getLength();
            tmpSize = tmpSize + nFileSize;

            if ((tmpSize <= size) || (size == 0)) {
                FileStatus stat = fs.getFileStatus(hdfsPath);

                System.err.println("File " + hdfsPath.toUri().getPath() + " " + nFileSize + " bytes, "
                        + "perms: " + stat.getOwner() + "/" + stat.getGroup() + ", "
                        + stat.getPermission().toString());

                tmpDate = stat.getModificationTime() / 1000;

                String sFsPath = localPath + hdfsPath.toUri().getPath();
                fsPath = new Path(sFsPath);

                File f = new File(sFsPath);

                // COMMENTED OUT: until a few backup cycles run
                // and the mtime gets in fact set on all copied
                // files.
                //
                // ignore it if the file exists and has the same mtime
                // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime())
                // {
                // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs");
                // continue;
                // }

                if (false == m_bDryRun) {
                    // check if we need to back up the local file
                    // (not directory), if it already exists.
                    if (f.exists() && f.isFile()) {
                        // ignore files with substrings in the
                        // no-preserve file
                        if (true == doPreserveFile(sFsPath)) {
                            // move it to the backup path
                            String sNewPath = preservePath + hdfsPath.toUri().getPath();
                            File newFile = new File(sNewPath);

                            // create directory structure for new file?
                            if (false == newFile.getParentFile().exists()) {
                                if (false == newFile.getParentFile().mkdirs()) {
                                    System.err
                                            .println("Failed to mkdirs " + newFile.getParentFile().toString());
                                    System.exit(1);
                                }
                            }

                            // rename existing file to new location
                            if (false == f.renameTo(newFile)) {
                                System.err.println(
                                        "Failed to renameTo " + f.toString() + " to " + newFile.toString());
                                System.exit(1);
                            }

                            System.out.println("preserved " + f.toString() + " into " + newFile.toString());
                        } else {
                            System.out.println("skipped preservation of " + f.toString());
                        }
                    }

                    // copy from hdfs to local filesystem
                    fs.copyToLocalFile(hdfsPath, fsPath);

                    // set the mtime to match hdfs file
                    f.setLastModified(stat.getModificationTime());

                    // compare checksums on both files
                    compareChecksums(fs, hdfsPath, sFsPath);
                }

                // don't print the progress after every file -- go
                // by at least 1% increments
                long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes);
                if (nPercentDone > m_nLastPercentBytesDone) {
                    System.out.println("progress: copied " + prettyPrintBytes(tmpSize) + ", " + nPercentDone
                            + "% done" + ", tstamp=" + tmpDate);

                    m_nLastPercentBytesDone = nPercentDone;
                }

                if (m_nSleepSeconds > 0) {
                    try {
                        Thread.sleep(1000 * m_nSleepSeconds);
                    } catch (Exception e2) {
                        // ignore
                    }
                }
            } else {
                return tmpDate;
            }
        } catch (IOException e) {
            System.err.println("FATAL ERROR: Something wrong with the file");
            System.err.println(e);
            System.out.println(tmpDate);
            System.exit(1);

            return 0;
        }
    }

    return 0;
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files/*from w  w w  .  ja v  a2 s.c  o  m*/
 *
 * fs:FileSystem object from HDFS
 * minDate:      Oldest date for files to be backed up
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 * pathList:Will be filled with all files in p
 * hmTimestamps: hashmap of timestamps for later sorting
 **/
public void checkDir(FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList,
        HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            // dump the mkdir and chmod commands for this
            // directory -- skip root directory only
            {
                FileStatus stat = fs.getFileStatus(p);

                if (!sPath.equals("/")) {
                    m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
                }

                m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

                Short sh = new Short(stat.getPermission().toShort());
                m_wrChmods.println("hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // another database to regular hive tables to
            // partitioned hive tables.  We use table names to
            // both exclude some from the backup, and for the rest
            // to dump out the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                m_nIgnoredTables++;

                if (m_nIgnoredTables < 5) {
                    System.out.println("Skipping ignore-table file: " + sPath);
                } else if (m_nIgnoredTables == 5) {
                    System.out.println("(...not showing other skipped tables...)");
                }
                return;
            }

            FileStatus stat = fs.getFileStatus(p);

            tmpDate = stat.getModificationTime() / 1000;

            // store the chmods/chowns for all files
            m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

            m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

            // check dates.  is it too young?
            if (tmpDate < minDate) {
                return;
            }

            // is the file too recent?
            if (tmpDate > maxDate) {
                //System.out.println("file too recent: " + sPath);
                return;
            }

            // file timestamp is ok
            pathList.add(p);

            hmTimestamps.put(p, new Long(tmpDate));

            // store info about total bytes neeed to backup
            m_nTotalBytes += fs.getContentSummary(p).getLength();
        }
    } catch (IOException e) {
        System.err.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files//  w  w w .j a  v  a2 s. co m
 *
 * fs:FileSystem object from HDFS
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 **/
public void checkDir(FileSystem fs, Path p, String sLocalPathRoot, long maxDate) {
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, fStat[i].getPath(), sLocalPathRoot, maxDate);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // tripmonster to regular hive tables to partitioned
            // hive tables.  We use table names to both exclude
            // some from the backup, and for the rest to dump out
            // the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                return;
            }

            // check the file
            FileStatus stat = fs.getFileStatus(p);

            // ignore files that are too new
            if ((stat.getModificationTime() / 1000) > maxDate) {
                System.out.println("IGNORING: " + sPath + " too new");
                return;
            }

            // warn about files that have a mis-matching block
            // size.  The checksum check will fail for them
            // anyways, so just catch it here.
            if (stat.getBlockSize() != N_BLOCK_SIZE) {
                System.out.println("ERROR: non-default block size (" + (stat.getBlockSize() / (1024 * 1024))
                        + "M) would fail checksum: " + sPath);
                return;
            }

            // get HDFS checksum
            FileChecksum ck = fs.getFileChecksum(p);
            String sCk, sCkShort;
            if (ck == null) {
                sCk = sCkShort = "<null>";
            } else {
                sCk = ck.toString();
                sCkShort = sCk.replaceAll("^.*:", "");
            }

            System.out.println(sPath + " len=" + stat.getLen() + " " + stat.getOwner() + "/" + stat.getGroup()
                    + " checksum=" + sCk);

            // find the local file
            String sFsPath = sLocalPathRoot + p.toUri().getPath();
            File fLocal = new File(sFsPath);
            if (!fLocal.exists()) {
                Calendar cal = Calendar.getInstance();
                cal.setTimeInMillis(stat.getModificationTime());

                System.out.println("ERROR: file does not exist: " + sFsPath + " hdfs-last-mtime="
                        + cal.getTime().toString());
                return;
            }
            if (!fLocal.isFile()) {
                System.out.println("ERROR: path is not a file: " + sFsPath);
                return;
            }
            if (stat.getLen() != fLocal.length()) {
                System.out.println("ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen()
                        + " fslen=" + fLocal.length());
                return;
            }

            // get local fs checksum
            FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
            if (ckLocal == null) {
                System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
                return;
            }

            // compare checksums as a string, to strip the
            // algorithm name from the beginning
            String sCkLocal = ckLocal.toString();
            String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

            if (false == sCkShort.equals(sCkLocalShort)) {
                System.out.println(
                        "ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal);
                return;
            }
        }
    } catch (IOException e) {
        System.out.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}