Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:com.blackberry.logdriver.admin.LogMaintenance.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from w w  w . j  av a 2s .c om

    // For some reason, Oozie needs some options to be set in system instead of
    // in the confiuration. So copy the configs over.
    {
        Iterator<Entry<String, String>> i = conf.iterator();
        while (i.hasNext()) {
            Entry<String, String> next = i.next();
            System.setProperty(next.getKey(), next.getValue());
        }
    }

    if (args.length < 3) {
        printUsage();
        return 1;
    }

    String userName = args[0];
    String dcNumber = args[1];
    String service = args[2];
    String date = null;
    String hour = null;
    if (args.length >= 4) {
        date = args[3];
    }
    if (args.length >= 5) {
        hour = args[4];
    }

    // Set from environment variables
    String mergeJobPropertiesFile = getConfOrEnv(conf, "MERGEJOB_CONF");
    String filterJobPropertiesFile = getConfOrEnv(conf, "FILTERJOB_CONF");
    String daysBeforeArchive = getConfOrEnv(conf, "DAYS_BEFORE_ARCHIVE");
    String daysBeforeDelete = getConfOrEnv(conf, "DAYS_BEFORE_DELETE");
    String maxConcurrentMR = getConfOrEnv(conf, "MAX_CONCURRENT_MR", "-1");
    String zkConnectString = getConfOrEnv(conf, "ZK_CONNECT_STRING");
    String logdir = getConfOrEnv(conf, "logdriver.logdir.name");
    boolean resetOrphanedJobs = Boolean.parseBoolean(getConfOrEnv(conf, "reset.orphaned.jobs", "true"));
    String rootDir = getConfOrEnv(conf, "service.root.dir");
    String maxTotalMR = getConfOrEnv(conf, "MAX_TOTAL_MR", "-1");

    boolean doMerge = true;
    boolean doArchive = true;
    boolean doDelete = true;

    if (zkConnectString == null) {
        LOG.error("ZK_CONNECT_STRING is not set.  Exiting.");
        return 1;
    }
    if (mergeJobPropertiesFile == null) {
        LOG.info("MERGEJOB_CONF is not set.  Not merging.");
        doMerge = false;
    }
    if (filterJobPropertiesFile == null) {
        LOG.info("FILTERJOB_CONF is not set.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeArchive == null) {
        LOG.info("DAYS_BEFORE_ARCHIVE is not set.  Not archiving.");
        doArchive = false;
    }
    if (doArchive && Integer.parseInt(daysBeforeArchive) < 0) {
        LOG.info("DAYS_BEFORE_ARCHIVE is negative.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeDelete == null) {
        LOG.info("DAYS_BEFORE_DELETE is not set.  Not deleting.");
        doDelete = false;
    }
    if (doDelete && Integer.parseInt(daysBeforeDelete) < 0) {
        LOG.info("DAYS_BEFORE_DELETE is negative.  Not deleting.");
        doDelete = false;
    }
    if (logdir == null) {
        LOG.info("LOGDRIVER_LOGDIR_NAME is not set.  Using default value of 'logs'.");
        logdir = "logs";
    }
    if (rootDir == null) {
        LOG.info("SERVICE_ROOT_DIR is not set.  Using default value of 'service'.");
        rootDir = "/service";
    }

    // We can hang if this fails. So make sure we abort if it fails.
    fs = null;
    try {
        fs = FileSystem.get(conf);
        fs.exists(new Path("/")); // Test if it works.
    } catch (IOException e) {
        LOG.error("Error getting filesystem.", e);
        return 1;
    }

    // Create the LockUtil instance
    lockUtil = new LockUtil(zkConnectString);

    // Now it's safe to create our Job Runner
    JobRunner jobRunner = new JobRunner(Integer.parseInt(maxConcurrentMR), Integer.parseInt(maxTotalMR));
    Thread jobRunnerThread = new Thread(jobRunner);
    jobRunnerThread.setName("JobRunner");
    jobRunnerThread.setDaemon(false);
    jobRunnerThread.start();

    // Figure out what date we start filters on.
    String filterCutoffDate = "";
    if (doArchive) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeArchive));
        filterCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Archiving logs from before {}", filterCutoffDate);
    }
    String deleteCutoffDate = "";
    if (doDelete) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeDelete));
        deleteCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Deleting logs from before {}", deleteCutoffDate);
    }

    long now = System.currentTimeMillis();

    // Various exceptions have been popping up here. So make sure I catch them
    // all.
    try {

        // Patterns to recognize hour, day and incoming directories, so that they
        // can be processed.
        Pattern datePathPattern;
        Pattern hourPathPattern;
        Pattern incomingPathPattern;
        Pattern dataPathPattern;
        Pattern archivePathPattern;
        Pattern workingPathPattern;
        if (hour != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/working/([^/]+)_(\\d+)");
        } else if (date != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        } else {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})");
            incomingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        }

        // Do a depth first search of the directory, processing anything that
        // looks
        // interesting along the way
        Deque<Path> paths = new ArrayDeque<Path>();
        Path rootPath = new Path(rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/");
        paths.push(rootPath);

        while (paths.size() > 0) {
            Path p = paths.pop();
            LOG.debug("{}", p.toString());

            if (!fs.exists(p)) {
                continue;
            }

            FileStatus dirStatus = fs.getFileStatus(p);
            FileStatus[] children = fs.listStatus(p);
            boolean addChildren = true;

            boolean old = dirStatus.getModificationTime() < now - WAIT_TIME;
            LOG.debug("    Was last modified {}ms ago", now - dirStatus.getModificationTime());

            if (!old) {
                LOG.debug("    Skipping, since it's not old enough.");

            } else if ((!rootPath.equals(p)) && (children.length == 0
                    || (children.length == 1 && children[0].getPath().getName().equals(READY_MARKER)))) {
                // old and no children? Delete!
                LOG.info("    Deleting empty directory {}", p.toString());
                fs.delete(p, true);

            } else {
                Matcher matcher = datePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking date directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDirectory() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                matcher = hourPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking hour directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDirectory() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                // Check to see if we have to run a merge
                matcher = incomingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking incoming directory");
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doMerge) {

                        // old, looks right, and has children? Run it!
                        boolean hasMatchingChildren = false;
                        boolean subdirTooYoung = false;

                        for (FileStatus child : children) {
                            if (!hasMatchingChildren) {
                                FileStatus[] grandchildren = fs.listStatus(child.getPath());
                                for (FileStatus gc : grandchildren) {
                                    if (VALID_FILE.matcher(gc.getPath().getName()).matches()) {
                                        hasMatchingChildren = true;
                                        break;
                                    }
                                }
                            }
                            if (!subdirTooYoung) {
                                if (child.getModificationTime() >= now - WAIT_TIME) {
                                    subdirTooYoung = true;
                                    LOG.debug("    Subdir {} is too young.", child.getPath());
                                }
                            }
                        }

                        if (!hasMatchingChildren) {
                            LOG.debug("    No files match the expected pattern ({})", VALID_FILE.pattern());
                        }

                        if (hasMatchingChildren && !subdirTooYoung) {
                            LOG.info("    Run Merge job {} :: {} {} {} {} {}", new Object[] { p.toString(),
                                    dcNumber, service, matchDate, matchHour, matchComponent });

                            Properties jobProps = new Properties();
                            jobProps.load(new FileInputStream(mergeJobPropertiesFile));

                            jobProps.setProperty("jobType", "merge");
                            jobProps.setProperty("rootDir", rootDir);
                            jobProps.setProperty("dcNumber", dcNumber);
                            jobProps.setProperty("service", service);
                            jobProps.setProperty("date", matchDate);
                            jobProps.setProperty("hour", matchHour);
                            jobProps.setProperty("component", matchComponent);
                            jobProps.setProperty("user.name", userName);
                            jobProps.setProperty("logdir", logdir);

                            jobRunner.submit(jobProps);

                            addChildren = false;
                        }
                    }
                }

                // Check to see if we need to run a filter and archive
                matcher = dataPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doArchive && timestamp.compareTo(filterCutoffDate) < 0) {

                        Properties jobProps = new Properties();
                        jobProps.load(new FileInputStream(filterJobPropertiesFile));

                        jobProps.setProperty("jobType", "filter");
                        jobProps.setProperty("rootDir", rootDir);
                        jobProps.setProperty("dcNumber", dcNumber);
                        jobProps.setProperty("service", service);
                        jobProps.setProperty("date", matchDate);
                        jobProps.setProperty("hour", matchHour);
                        jobProps.setProperty("component", matchComponent);
                        jobProps.setProperty("user.name", userName);
                        jobProps.setProperty("logdir", logdir);

                        // Check to see if we should just keep all or delete all here.
                        // The filter file should be here
                        String appPath = jobProps.getProperty("oozie.wf.application.path");
                        appPath = appPath.replaceFirst("\\$\\{.*?\\}", "");
                        Path filterFile = new Path(
                                appPath + "/" + conf.get("filter.definition.file", service + ".yaml"));
                        LOG.info("Filter file is {}", filterFile);
                        if (fs.exists(filterFile)) {
                            List<BoomFilterMapper.Filter> filters = BoomFilterMapper.loadFilters(matchComponent,
                                    fs.open(filterFile));

                            if (filters == null) {
                                LOG.warn(
                                        "    Got null when getting filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 0) {
                                LOG.warn("    Got no filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.KeepAllFilter) {
                                LOG.info("    Keeping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // Move files from data to archive
                                // delete it all!
                                String destination = rootDir + "/" + dcNumber + "/" + service + "/" + logdir
                                        + "/" + matchDate + "/" + matchHour + "/" + matchComponent
                                        + "/archive/";

                                PathInfo pathInfo = new PathInfo();
                                pathInfo.setDcNumber(dcNumber);
                                pathInfo.setService(service);
                                pathInfo.setLogdir(logdir);
                                pathInfo.setDate(matchDate);
                                pathInfo.setHour(matchHour);
                                pathInfo.setComponent(matchComponent);

                                try {
                                    lockUtil.acquireWriteLock(lockUtil.getLockPath(pathInfo));
                                    fs.mkdirs(new Path(destination));
                                    for (FileStatus f : fs.listStatus(p)) {
                                        fs.rename(f.getPath(), new Path(destination));
                                    }
                                } finally {
                                    lockUtil.releaseWriteLock(lockUtil.getLockPath(pathInfo));
                                }
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.DropAllFilter) {
                                LOG.info("    Dropping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });

                                PathInfo pathInfo = new PathInfo();
                                pathInfo.setDcNumber(dcNumber);
                                pathInfo.setService(service);
                                pathInfo.setLogdir(logdir);
                                pathInfo.setDate(matchDate);
                                pathInfo.setHour(matchHour);
                                pathInfo.setComponent(matchComponent);

                                try {
                                    lockUtil.acquireWriteLock(lockUtil.getLockPath(pathInfo));
                                    fs.delete(p, true);
                                } finally {
                                    lockUtil.releaseWriteLock(lockUtil.getLockPath(pathInfo));
                                }

                            } else {
                                LOG.info("    Run Filter/Archive job {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                jobRunner.submit(jobProps);
                            }
                        } else {
                            LOG.warn("Skipping filter job, since no filter file exists");
                        }

                        addChildren = false;
                    }
                }

                matcher = archivePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    }
                }

                matcher = workingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.info("  Matches working pattern ({})", p);
                    if (resetOrphanedJobs) {
                        String matchDate = matcher.group(1);
                        String matchHour = matcher.group(2);
                        String matchComponent = matcher.group(3);

                        // Move everything from working/xxx/incoming/ to incoming/
                        PathInfo lockPathInfo = new PathInfo(logdir, rootDir + "/" + dcNumber + "/" + service
                                + "/" + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent);
                        lockUtil.acquireWriteLock(lockUtil.getLockPath(lockPathInfo));

                        FileStatus[] fileStatuses = fs.listStatus(new Path(p.toUri().getPath() + "/incoming/"));
                        if (fileStatuses != null) {
                            for (FileStatus fileStatus : fileStatuses) {
                                Path toPath = new Path(
                                        fileStatus.getPath().getParent().getParent().getParent().getParent(),
                                        "incoming/" + fileStatus.getPath().getName());

                                LOG.info("  Moving data from {} to {}", fileStatus.getPath(), toPath);
                                LOG.info("    mkdir {}", toPath);
                                fs.mkdirs(toPath);

                                Path fromDir = new Path(p.toUri().getPath(),
                                        "incoming/" + fileStatus.getPath().getName());
                                LOG.info("    moving from {}", fromDir);
                                FileStatus[] files = fs.listStatus(fromDir);
                                if (files == null || files.length == 0) {
                                    LOG.info("    Nothing to move from  {}", fromDir);
                                } else {
                                    for (FileStatus f : files) {
                                        LOG.info("    rename {} {}", f.getPath(),
                                                new Path(toPath, f.getPath().getName()));
                                        fs.rename(f.getPath(), new Path(toPath, f.getPath().getName()));
                                    }
                                }

                                LOG.info("    rm {}", fileStatus.getPath());
                                fs.delete(fileStatus.getPath(), true);
                            }
                            lockUtil.releaseWriteLock(lockUtil.getLockPath(lockPathInfo));

                            fs.delete(new Path(p.toUri().getPath()), true);
                        }
                    }

                    addChildren = false;
                }
            }

            // Add any children which are directories to the stack.
            if (addChildren) {
                for (int i = children.length - 1; i >= 0; i--) {
                    FileStatus child = children[i];
                    if (child.isDirectory()) {
                        paths.push(child.getPath());
                    }
                }
            }
        }

        // Since we may have deleted a bunch of directories, delete any unused
        // locks
        // from ZooKeeper.
        {
            LOG.info("Checking for unused locks in ZooKeeper");
            String scanPath = rootDir + "/" + dcNumber + "/" + service + "/" + logdir;
            if (date != null) {
                scanPath += "/" + date;
                if (hour != null) {
                    scanPath += "/" + hour;
                }
            }

            List<LockInfo> lockInfo = lockUtil.scan(scanPath);

            for (LockInfo li : lockInfo) {
                // Check if the lock path still exists in HDFS. If it doesn't, then
                // delete it from ZooKeeper.
                String path = li.getPath();
                String hdfsPath = path.substring(LockUtil.ROOT.length());
                if (!fs.exists(new Path(hdfsPath))) {
                    ZooKeeper zk = lockUtil.getZkClient();

                    while (!path.equals(LockUtil.ROOT)) {
                        try {
                            zk.delete(path, -1);
                        } catch (KeeperException.NotEmptyException e) {
                            // That's fine. just stop trying then.
                            break;
                        } catch (Exception e) {
                            LOG.error("Caught exception trying to delete from ZooKeeper.", e);
                            break;
                        }
                        LOG.info("Deleted from ZooKeeper: {}", path);
                        path = path.substring(0, path.lastIndexOf('/'));
                    }

                }
            }
        }

        // Now that we're done, wait for the Oozie Runner to stop, and print the
        // results.
        LOG.info("Waiting for Oozie jobs to complete.");
        jobRunner.shutdown();
        jobRunnerThread.join();
        LOG.info("Job Stats : Started={} Succeeded={} failed={} errors={}",
                new Object[] { jobRunner.getStarted(), jobRunner.getSucceeded(), jobRunner.getFailed(),
                        jobRunner.getErrors() });

        lockUtil.close();

    } catch (Exception e) {
        LOG.error("Unexpected exception caught.", e);
        return 1;
    }

    return 0;
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemDataset.java

License:Apache License

@Deprecated
void accumulateDatafilePaths(Path directory, List<Path> paths) throws IOException {

    for (FileStatus status : fileSystem.listStatus(directory, PathFilters.notHidden())) {

        if (status.isDirectory()) {
            accumulateDatafilePaths(status.getPath(), paths);
        } else {//from   w  w w  .  j  a  va2  s  .  c  o  m
            paths.add(status.getPath());
        }
    }
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemMetadataProvider.java

License:Apache License

@Override
public List<String> list() {
    List<String> datasets = Lists.newArrayList();
    try {//from   w w w  .ja v a2s .c o  m
        FileStatus[] entries = rootFileSystem.listStatus(rootDirectory, PathFilters.notHidden());
        for (FileStatus entry : entries) {
            // assumes that all unhidden directories under the root are data sets
            if (entry.isDirectory() && rootFileSystem.exists(new Path(entry.getPath(), ".metadata"))) {
                // may want to add a check: !RESERVED_NAMES.contains(name)
                datasets.add(entry.getPath().getName());
            } else {
                continue;
            }
        }
    } catch (FileNotFoundException ex) {
        // the repo hasn't created any files yet
        return datasets;
    } catch (IOException ex) {
        throw new MetadataProviderException("Could not list data sets", ex);
    }
    return datasets;
}

From source file:com.cloudera.data.filesystem.FileSystemDataset.java

License:Apache License

private void accumulateDatafilePaths(Path directory, List<Path> paths) throws IOException {

    for (FileStatus status : fileSystem.listStatus(directory, PathFilters.notHidden())) {

        if (status.isDirectory()) {
            accumulateDatafilePaths(status.getPath(), paths);
        } else {//from  ww  w .  j  a v a 2 s  .co m
            paths.add(status.getPath());
        }
    }
}

From source file:com.cloudera.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Loads the file block metadata for the given collection of FileDescriptors.  The
 * FileDescriptors are passed as a tree, where the first level is indexed by
 * filesystem, the second level is indexed by partition location, and the leaves are
 * the list of files that exist under each directory.
 */// w  w  w.j av  a2s. co  m
private void loadBlockMd(Map<FsKey, Map<String, List<FileDescriptor>>> perFsFileDescs) throws RuntimeException {
    Preconditions.checkNotNull(perFsFileDescs);
    LOG.debug("load block md for " + name_);

    for (FsKey fsEntry : perFsFileDescs.keySet()) {
        FileSystem fs = fsEntry.filesystem;
        // Store all BlockLocations so they can be reused when loading the disk IDs.
        List<BlockLocation> blockLocations = Lists.newArrayList();
        int numCachedBlocks = 0;
        Map<String, List<FileDescriptor>> partitionToFds = perFsFileDescs.get(fsEntry);
        Preconditions.checkNotNull(partitionToFds);
        // loop over all files and record their block metadata, minus volume ids
        for (String partitionDir : partitionToFds.keySet()) {
            Path partDirPath = new Path(partitionDir);
            for (FileDescriptor fileDescriptor : partitionToFds.get(partitionDir)) {
                Path p = new Path(partDirPath, fileDescriptor.getFileName());
                try {
                    FileStatus fileStatus = fs.getFileStatus(p);
                    // fileDescriptors should not contain directories.
                    Preconditions.checkArgument(!fileStatus.isDirectory());
                    BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
                    Preconditions.checkNotNull(locations);
                    blockLocations.addAll(Arrays.asList(locations));

                    // Loop over all blocks in the file.
                    for (BlockLocation block : locations) {
                        String[] blockHostPorts = block.getNames();
                        try {
                            blockHostPorts = block.getNames();
                        } catch (IOException e) {
                            // this shouldn't happen, getNames() doesn't throw anything
                            String errorMsg = "BlockLocation.getNames() failed:\n" + e.getMessage();
                            LOG.error(errorMsg);
                            throw new IllegalStateException(errorMsg);
                        }
                        // Now enumerate all replicas of the block, adding any unknown hosts to
                        // hostIndex_ and the index for that host to replicaHostIdxs.
                        List<Integer> replicaHostIdxs = new ArrayList<Integer>(blockHostPorts.length);
                        for (int i = 0; i < blockHostPorts.length; ++i) {
                            String[] ip_port = blockHostPorts[i].split(":");
                            Preconditions.checkState(ip_port.length == 2);
                            TNetworkAddress network_address = new TNetworkAddress(ip_port[0],
                                    Integer.parseInt(ip_port[1]));
                            replicaHostIdxs.add(hostIndex_.getIndex(network_address));
                        }
                        fileDescriptor.addFileBlock(
                                new FileBlock(block.getOffset(), block.getLength(), replicaHostIdxs));
                    }
                } catch (IOException e) {
                    throw new RuntimeException(
                            "couldn't determine block locations for path '" + p + "':\n" + e.getMessage(), e);
                }
            }
        }

        if (SUPPORTS_VOLUME_ID && fs instanceof DistributedFileSystem) {
            LOG.trace("loading disk ids for: " + getFullName() + ". nodes: " + getNumNodes() + ". file system: "
                    + fsEntry);
            loadDiskIds((DistributedFileSystem) fs, blockLocations, partitionToFds);
            LOG.trace("completed load of disk ids for: " + getFullName());
        }
    }
}

From source file:com.cloudera.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Creates a new HdfsPartition object to be added to the internal partition list.
 * Populates with file format information and file locations. Partitions may be empty,
 * or may not even exist on the file system (a partition's location may have been
 * changed to a new path that is about to be created by an INSERT). For unchanged
 * files (indicated by unchanged mtime), reuses the FileDescriptor from the
 * oldFileDescMap. The one exception is if the partition is marked as cached
 * in which case the block metadata cannot be reused. Otherwise, creates a new
 * FileDescriptor for each modified or new file and adds it to newFileDescMap.
 * Both old and newFileDescMap are Maps of parent directory (partition location)
 * to list of files (FileDescriptors) under that directory.
 * Returns new partition if successful or null if none was added.
 * Separated from addPartition to reduce the number of operations done
 * while holding the lock on the hdfs table.
        //from w  ww. j av  a  2 s  . c  o m
 *  @throws CatalogException
 *    if the supplied storage descriptor contains metadata that Impala can't
 *    understand.
 */
private HdfsPartition createPartition(StorageDescriptor storageDescriptor,
        org.apache.hadoop.hive.metastore.api.Partition msPartition,
        Map<String, List<FileDescriptor>> oldFileDescMap,
        Map<FsKey, Map<String, List<FileDescriptor>>> perFsFileDescMap) throws CatalogException {
    HdfsStorageDescriptor fileFormatDescriptor = HdfsStorageDescriptor.fromStorageDescriptor(this.name_,
            storageDescriptor);
    Path partDirPath = new Path(storageDescriptor.getLocation());
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();
    // If the partition is marked as cached, the block location metadata must be
    // reloaded, even if the file times have not changed.
    boolean isMarkedCached = isMarkedCached_;
    List<LiteralExpr> keyValues = Lists.newArrayList();
    if (msPartition != null) {
        isMarkedCached = HdfsCachingUtil.getCacheDirIdFromParams(msPartition.getParameters()) != null;
        // Load key values
        for (String partitionKey : msPartition.getValues()) {
            Type type = getColumns().get(keyValues.size()).getType();
            // Deal with Hive's special NULL partition key.
            if (partitionKey.equals(nullPartitionKeyValue_)) {
                keyValues.add(NullLiteral.create(type));
            } else {
                try {
                    keyValues.add(LiteralExpr.create(partitionKey, type));
                } catch (Exception ex) {
                    LOG.warn("Failed to create literal expression of type: " + type, ex);
                    throw new CatalogException("Invalid partition key value of type: " + type, ex);
                }
            }
        }
        try {
            Expr.analyze(keyValues, null);
        } catch (AnalysisException e) {
            // should never happen
            throw new IllegalStateException(e);
        }
    }
    try {
        // Each partition could reside on a different filesystem.
        FileSystem fs = partDirPath.getFileSystem(CONF);
        multipleFileSystems_ = multipleFileSystems_
                || !FileSystemUtil.isPathOnFileSystem(new Path(getLocation()), fs);
        if (fs.exists(partDirPath)) {
            // FileSystem does not have an API that takes in a timestamp and returns a list
            // of files that has been added/changed since. Therefore, we are calling
            // fs.listStatus() to list all the files.
            for (FileStatus fileStatus : fs.listStatus(partDirPath)) {
                String fileName = fileStatus.getPath().getName().toString();
                if (fileStatus.isDirectory() || FileSystemUtil.isHiddenFile(fileName)
                        || HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) {
                    // Ignore directory, hidden file starting with . or _, and LZO index files
                    // If a directory is erroneously created as a subdirectory of a partition dir
                    // we should ignore it and move on. Hive will not recurse into directories.
                    // Skip index files, these are read by the LZO scanner directly.
                    continue;
                }

                String partitionDir = fileStatus.getPath().getParent().toString();
                FileDescriptor fd = null;
                // Search for a FileDescriptor with the same partition dir and file name. If one
                // is found, it will be chosen as a candidate to reuse.
                if (oldFileDescMap != null && oldFileDescMap.get(partitionDir) != null) {
                    for (FileDescriptor oldFileDesc : oldFileDescMap.get(partitionDir)) {
                        if (oldFileDesc.getFileName().equals(fileName)) {
                            fd = oldFileDesc;
                            break;
                        }
                    }
                }

                // Check if this FileDescriptor has been modified since last loading its block
                // location information. If it has not been changed, the previously loaded
                // value can be reused.
                if (fd == null || isMarkedCached || fd.getFileLength() != fileStatus.getLen()
                        || fd.getModificationTime() != fileStatus.getModificationTime()) {
                    // Create a new file descriptor, the block metadata will be populated by
                    // loadBlockMd.
                    fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime());
                    addPerFsFileDesc(perFsFileDescMap, fs, partitionDir, fd);
                }

                List<FileDescriptor> fds = fileDescMap_.get(partitionDir);
                if (fds == null) {
                    fds = Lists.newArrayList();
                    fileDescMap_.put(partitionDir, fds);
                }
                fds.add(fd);

                // Add to the list of FileDescriptors for this partition.
                fileDescriptors.add(fd);
            }
            numHdfsFiles_ += fileDescriptors.size();
        }
        HdfsPartition partition = new HdfsPartition(this, msPartition, keyValues, fileFormatDescriptor,
                fileDescriptors, getAvailableAccessLevel(fs, partDirPath));
        partition.checkWellFormed();
        return partition;
    } catch (Exception e) {
        throw new CatalogException("Failed to create partition: ", e);
    }
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Moves all visible (non-hidden) files from a source directory to a destination
 * directory. Any sub-directories within the source directory are skipped.
 * Returns the number of files moved as part of this operation.
 *///from w ww.j a  v  a2s.co  m
public static int moveAllVisibleFiles(Path sourceDir, Path destDir) throws IOException {
    FileSystem fs = destDir.getFileSystem(CONF);
    Preconditions.checkState(fs.isDirectory(destDir));
    Preconditions.checkState(fs.isDirectory(sourceDir));

    // Use the same UUID to resolve all file name conflicts. This helps mitigate problems
    // that might happen if there is a conflict moving a set of files that have
    // dependent file names. For example, foo.lzo and foo.lzo_index.
    UUID uuid = UUID.randomUUID();

    // Enumerate all the files in the source
    int numFilesMoved = 0;
    for (FileStatus fStatus : fs.listStatus(sourceDir)) {
        if (fStatus.isDirectory()) {
            LOG.debug("Skipping copy of directory: " + fStatus.getPath());
            continue;
        } else if (isHiddenFile(fStatus.getPath().getName())) {
            continue;
        }

        Path destFile = new Path(destDir, fStatus.getPath().getName());
        if (fs.exists(destFile)) {
            destFile = new Path(destDir, appendToBaseFileName(destFile.getName(), uuid.toString()));
        }
        FileSystemUtil.moveFile(fStatus.getPath(), destFile, false);
        ++numFilesMoved;
    }
    return numFilesMoved;
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Returns true if the given Path contains any sub directories, otherwise false.
 *///  w w  w.j  a  v a2s  .  co m
public static boolean containsSubdirectory(Path directory) throws FileNotFoundException, IOException {
    FileSystem fs = directory.getFileSystem(CONF);
    // Enumerate all the files in the source
    for (FileStatus fStatus : fs.listStatus(directory)) {
        if (fStatus.isDirectory()) {
            return true;
        }
    }
    return false;
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Get file descriptor according to fileStatus and oldFileDescMap. It will return null
 * if the file is a directory, or hidden file starting with . or _, or LZO index files.
 * If the file can be found in the old File description map and not modified, and not
 * 'isMarkedCached' - partition marked as cached, just reuse the one in cache. Otherwise
 * it will create a new File description with filename, file length and modification
 * time./*  w  ww  .  j  a  v  a2  s.  com*/
 *
 * Must be thread safe. Access to 'oldFileDescMap', 'perFsFileBlocks' and 'hostIndex'
 * must be protected.
 */
private static FileDescriptor getFileDescriptor(FileSystem fs, FileStatus fileStatus, HdfsFileFormat fileFormat,
        Map<String, List<FileDescriptor>> oldFileDescMap, boolean isMarkedCached,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, String tblName, ListMap<TNetworkAddress> hostIndex) {
    String fileName = fileStatus.getPath().getName().toString();

    if (fileStatus.isDirectory() || FileSystemUtil.isHiddenFile(fileName)
            || HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) {
        // Ignore directory, hidden file starting with . or _, and LZO index files
        // If a directory is erroneously created as a subdirectory of a partition dir
        // we should ignore it and move on. Hive will not recurse into directories.
        // Skip index files, these are read by the LZO scanner directly.
        return null;
    }

    String partitionDir = fileStatus.getPath().getParent().toString();
    FileDescriptor fd = null;
    // Search for a FileDescriptor with the same partition dir and file name. If one
    // is found, it will be chosen as a candidate to reuse.
    if (oldFileDescMap != null) {
        synchronized (oldFileDescMap) {
            if (oldFileDescMap.get(partitionDir) != null) {
                for (FileDescriptor oldFileDesc : oldFileDescMap.get(partitionDir)) {
                    // TODO: This doesn't seem like the right data structure if a directory has
                    // a lot of files.
                    if (oldFileDesc.getFileName().equals(fileName)) {
                        fd = oldFileDesc;
                        break;
                    }
                }
            }
        }
    }

    // Check if this FileDescriptor has been modified since last loading its block
    // location information. If it has not been changed, the previously loaded value can
    // be reused.
    if (fd == null || isMarkedCached || fd.getFileLength() != fileStatus.getLen()
            || fd.getModificationTime() != fileStatus.getModificationTime()) {
        // Create a new file descriptor and load the file block metadata,
        // collecting the block metadata into perFsFileBlocks.  The disk IDs for
        // all the blocks of each filesystem will be loaded by loadDiskIds().
        fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime());
        loadBlockMetadata(fs, fileStatus, fd, fileFormat, perFsFileBlocks, tblName, hostIndex);
    }

    return fd;
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Queries the filesystem to load the file block metadata (e.g. DFS blocks) for the
 * given file. Adds the newly created block metadata and block location to the
 * perFsFileBlocks, so that the storage IDs for each block can be retrieved from
 * BlockLocation./*from  ww w .  ja va2 s. c  o  m*/
 *
 * Must be threadsafe. Access to 'perFsFileBlocks' and 'hostIndex' must be protected.
 */
private static void loadBlockMetadata(FileSystem fs, FileStatus file, FileDescriptor fd,
        HdfsFileFormat fileFormat, Map<FsKey, FileBlocksInfo> perFsFileBlocks, String tblName,
        ListMap<TNetworkAddress> hostIndex) {
    Preconditions.checkNotNull(fd);
    Preconditions.checkNotNull(perFsFileBlocks);
    Preconditions.checkArgument(!file.isDirectory());
    LOG.debug("load block md for " + tblName + " file " + fd.getFileName());

    if (!FileSystemUtil.hasGetFileBlockLocations(fs)) {
        synthesizeBlockMetadata(file, fd, fileFormat, hostIndex);
        return;
    }
    try {
        BlockLocation[] locations = null;
        if (file instanceof LocatedFileStatus) {
            locations = ((LocatedFileStatus) file).getBlockLocations();
        } else {
            locations = fs.getFileBlockLocations(file, 0, file.getLen());
        }
        Preconditions.checkNotNull(locations);

        // Loop over all blocks in the file.
        for (BlockLocation loc : locations) {
            Preconditions.checkNotNull(loc);
            fd.addFileBlock(createFileBlock(loc, hostIndex));
        }

        // Remember the THdfsFileBlocks and corresponding BlockLocations. Once all the
        // blocks are collected, the disk IDs will be queried in one batch per filesystem.
        FsKey fsKey = new FsKey(fs);
        synchronized (perFsFileBlocks) {
            FileBlocksInfo infos = perFsFileBlocks.get(fsKey);
            if (infos == null) {
                infos = new FileBlocksInfo();
                perFsFileBlocks.put(fsKey, infos);
            }
            infos.addBlocks(fd.getFileBlocks(), Arrays.asList(locations));
        }
    } catch (IOException e) {
        throw new RuntimeException(
                "couldn't determine block locations for path '" + file.getPath() + "':\n" + e.getMessage(), e);
    }
}