Example usage for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString()

Source Link

Usage

From source file:com.blackberry.logdriver.admin.LogMaintenance.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from   ww w  .j  av  a2  s  .  c o  m

    // For some reason, Oozie needs some options to be set in system instead of
    // in the confiuration. So copy the configs over.
    {
        Iterator<Entry<String, String>> i = conf.iterator();
        while (i.hasNext()) {
            Entry<String, String> next = i.next();
            System.setProperty(next.getKey(), next.getValue());
        }
    }

    if (args.length < 3) {
        printUsage();
        return 1;
    }

    String userName = args[0];
    String dcNumber = args[1];
    String service = args[2];
    String date = null;
    String hour = null;
    if (args.length >= 4) {
        date = args[3];
    }
    if (args.length >= 5) {
        hour = args[4];
    }

    // Set from environment variables
    String mergeJobPropertiesFile = getConfOrEnv(conf, "MERGEJOB_CONF");
    String filterJobPropertiesFile = getConfOrEnv(conf, "FILTERJOB_CONF");
    String daysBeforeArchive = getConfOrEnv(conf, "DAYS_BEFORE_ARCHIVE");
    String daysBeforeDelete = getConfOrEnv(conf, "DAYS_BEFORE_DELETE");
    String maxConcurrentMR = getConfOrEnv(conf, "MAX_CONCURRENT_MR", "-1");
    String zkConnectString = getConfOrEnv(conf, "ZK_CONNECT_STRING");
    String logdir = getConfOrEnv(conf, "logdriver.logdir.name");
    boolean resetOrphanedJobs = Boolean.parseBoolean(getConfOrEnv(conf, "reset.orphaned.jobs", "true"));
    String rootDir = getConfOrEnv(conf, "service.root.dir");
    String maxTotalMR = getConfOrEnv(conf, "MAX_TOTAL_MR", "-1");

    boolean doMerge = true;
    boolean doArchive = true;
    boolean doDelete = true;

    if (zkConnectString == null) {
        LOG.error("ZK_CONNECT_STRING is not set.  Exiting.");
        return 1;
    }
    if (mergeJobPropertiesFile == null) {
        LOG.info("MERGEJOB_CONF is not set.  Not merging.");
        doMerge = false;
    }
    if (filterJobPropertiesFile == null) {
        LOG.info("FILTERJOB_CONF is not set.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeArchive == null) {
        LOG.info("DAYS_BEFORE_ARCHIVE is not set.  Not archiving.");
        doArchive = false;
    }
    if (doArchive && Integer.parseInt(daysBeforeArchive) < 0) {
        LOG.info("DAYS_BEFORE_ARCHIVE is negative.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeDelete == null) {
        LOG.info("DAYS_BEFORE_DELETE is not set.  Not deleting.");
        doDelete = false;
    }
    if (doDelete && Integer.parseInt(daysBeforeDelete) < 0) {
        LOG.info("DAYS_BEFORE_DELETE is negative.  Not deleting.");
        doDelete = false;
    }
    if (logdir == null) {
        LOG.info("LOGDRIVER_LOGDIR_NAME is not set.  Using default value of 'logs'.");
        logdir = "logs";
    }
    if (rootDir == null) {
        LOG.info("SERVICE_ROOT_DIR is not set.  Using default value of 'service'.");
        rootDir = "/service";
    }

    // We can hang if this fails. So make sure we abort if it fails.
    fs = null;
    try {
        fs = FileSystem.get(conf);
        fs.exists(new Path("/")); // Test if it works.
    } catch (IOException e) {
        LOG.error("Error getting filesystem.", e);
        return 1;
    }

    // Create the LockUtil instance
    lockUtil = new LockUtil(zkConnectString);

    // Now it's safe to create our Job Runner
    JobRunner jobRunner = new JobRunner(Integer.parseInt(maxConcurrentMR), Integer.parseInt(maxTotalMR));
    Thread jobRunnerThread = new Thread(jobRunner);
    jobRunnerThread.setName("JobRunner");
    jobRunnerThread.setDaemon(false);
    jobRunnerThread.start();

    // Figure out what date we start filters on.
    String filterCutoffDate = "";
    if (doArchive) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeArchive));
        filterCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Archiving logs from before {}", filterCutoffDate);
    }
    String deleteCutoffDate = "";
    if (doDelete) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeDelete));
        deleteCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Deleting logs from before {}", deleteCutoffDate);
    }

    long now = System.currentTimeMillis();

    // Various exceptions have been popping up here. So make sure I catch them
    // all.
    try {

        // Patterns to recognize hour, day and incoming directories, so that they
        // can be processed.
        Pattern datePathPattern;
        Pattern hourPathPattern;
        Pattern incomingPathPattern;
        Pattern dataPathPattern;
        Pattern archivePathPattern;
        Pattern workingPathPattern;
        if (hour != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/working/([^/]+)_(\\d+)");
        } else if (date != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        } else {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})");
            incomingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        }

        // Do a depth first search of the directory, processing anything that
        // looks
        // interesting along the way
        Deque<Path> paths = new ArrayDeque<Path>();
        Path rootPath = new Path(rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/");
        paths.push(rootPath);

        while (paths.size() > 0) {
            Path p = paths.pop();
            LOG.debug("{}", p.toString());

            if (!fs.exists(p)) {
                continue;
            }

            FileStatus dirStatus = fs.getFileStatus(p);
            FileStatus[] children = fs.listStatus(p);
            boolean addChildren = true;

            boolean old = dirStatus.getModificationTime() < now - WAIT_TIME;
            LOG.debug("    Was last modified {}ms ago", now - dirStatus.getModificationTime());

            if (!old) {
                LOG.debug("    Skipping, since it's not old enough.");

            } else if ((!rootPath.equals(p)) && (children.length == 0
                    || (children.length == 1 && children[0].getPath().getName().equals(READY_MARKER)))) {
                // old and no children? Delete!
                LOG.info("    Deleting empty directory {}", p.toString());
                fs.delete(p, true);

            } else {
                Matcher matcher = datePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking date directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDirectory() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                matcher = hourPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking hour directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDirectory() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                // Check to see if we have to run a merge
                matcher = incomingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking incoming directory");
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doMerge) {

                        // old, looks right, and has children? Run it!
                        boolean hasMatchingChildren = false;
                        boolean subdirTooYoung = false;

                        for (FileStatus child : children) {
                            if (!hasMatchingChildren) {
                                FileStatus[] grandchildren = fs.listStatus(child.getPath());
                                for (FileStatus gc : grandchildren) {
                                    if (VALID_FILE.matcher(gc.getPath().getName()).matches()) {
                                        hasMatchingChildren = true;
                                        break;
                                    }
                                }
                            }
                            if (!subdirTooYoung) {
                                if (child.getModificationTime() >= now - WAIT_TIME) {
                                    subdirTooYoung = true;
                                    LOG.debug("    Subdir {} is too young.", child.getPath());
                                }
                            }
                        }

                        if (!hasMatchingChildren) {
                            LOG.debug("    No files match the expected pattern ({})", VALID_FILE.pattern());
                        }

                        if (hasMatchingChildren && !subdirTooYoung) {
                            LOG.info("    Run Merge job {} :: {} {} {} {} {}", new Object[] { p.toString(),
                                    dcNumber, service, matchDate, matchHour, matchComponent });

                            Properties jobProps = new Properties();
                            jobProps.load(new FileInputStream(mergeJobPropertiesFile));

                            jobProps.setProperty("jobType", "merge");
                            jobProps.setProperty("rootDir", rootDir);
                            jobProps.setProperty("dcNumber", dcNumber);
                            jobProps.setProperty("service", service);
                            jobProps.setProperty("date", matchDate);
                            jobProps.setProperty("hour", matchHour);
                            jobProps.setProperty("component", matchComponent);
                            jobProps.setProperty("user.name", userName);
                            jobProps.setProperty("logdir", logdir);

                            jobRunner.submit(jobProps);

                            addChildren = false;
                        }
                    }
                }

                // Check to see if we need to run a filter and archive
                matcher = dataPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doArchive && timestamp.compareTo(filterCutoffDate) < 0) {

                        Properties jobProps = new Properties();
                        jobProps.load(new FileInputStream(filterJobPropertiesFile));

                        jobProps.setProperty("jobType", "filter");
                        jobProps.setProperty("rootDir", rootDir);
                        jobProps.setProperty("dcNumber", dcNumber);
                        jobProps.setProperty("service", service);
                        jobProps.setProperty("date", matchDate);
                        jobProps.setProperty("hour", matchHour);
                        jobProps.setProperty("component", matchComponent);
                        jobProps.setProperty("user.name", userName);
                        jobProps.setProperty("logdir", logdir);

                        // Check to see if we should just keep all or delete all here.
                        // The filter file should be here
                        String appPath = jobProps.getProperty("oozie.wf.application.path");
                        appPath = appPath.replaceFirst("\\$\\{.*?\\}", "");
                        Path filterFile = new Path(
                                appPath + "/" + conf.get("filter.definition.file", service + ".yaml"));
                        LOG.info("Filter file is {}", filterFile);
                        if (fs.exists(filterFile)) {
                            List<BoomFilterMapper.Filter> filters = BoomFilterMapper.loadFilters(matchComponent,
                                    fs.open(filterFile));

                            if (filters == null) {
                                LOG.warn(
                                        "    Got null when getting filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 0) {
                                LOG.warn("    Got no filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.KeepAllFilter) {
                                LOG.info("    Keeping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // Move files from data to archive
                                // delete it all!
                                String destination = rootDir + "/" + dcNumber + "/" + service + "/" + logdir
                                        + "/" + matchDate + "/" + matchHour + "/" + matchComponent
                                        + "/archive/";

                                PathInfo pathInfo = new PathInfo();
                                pathInfo.setDcNumber(dcNumber);
                                pathInfo.setService(service);
                                pathInfo.setLogdir(logdir);
                                pathInfo.setDate(matchDate);
                                pathInfo.setHour(matchHour);
                                pathInfo.setComponent(matchComponent);

                                try {
                                    lockUtil.acquireWriteLock(lockUtil.getLockPath(pathInfo));
                                    fs.mkdirs(new Path(destination));
                                    for (FileStatus f : fs.listStatus(p)) {
                                        fs.rename(f.getPath(), new Path(destination));
                                    }
                                } finally {
                                    lockUtil.releaseWriteLock(lockUtil.getLockPath(pathInfo));
                                }
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.DropAllFilter) {
                                LOG.info("    Dropping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });

                                PathInfo pathInfo = new PathInfo();
                                pathInfo.setDcNumber(dcNumber);
                                pathInfo.setService(service);
                                pathInfo.setLogdir(logdir);
                                pathInfo.setDate(matchDate);
                                pathInfo.setHour(matchHour);
                                pathInfo.setComponent(matchComponent);

                                try {
                                    lockUtil.acquireWriteLock(lockUtil.getLockPath(pathInfo));
                                    fs.delete(p, true);
                                } finally {
                                    lockUtil.releaseWriteLock(lockUtil.getLockPath(pathInfo));
                                }

                            } else {
                                LOG.info("    Run Filter/Archive job {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                jobRunner.submit(jobProps);
                            }
                        } else {
                            LOG.warn("Skipping filter job, since no filter file exists");
                        }

                        addChildren = false;
                    }
                }

                matcher = archivePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    }
                }

                matcher = workingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.info("  Matches working pattern ({})", p);
                    if (resetOrphanedJobs) {
                        String matchDate = matcher.group(1);
                        String matchHour = matcher.group(2);
                        String matchComponent = matcher.group(3);

                        // Move everything from working/xxx/incoming/ to incoming/
                        PathInfo lockPathInfo = new PathInfo(logdir, rootDir + "/" + dcNumber + "/" + service
                                + "/" + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent);
                        lockUtil.acquireWriteLock(lockUtil.getLockPath(lockPathInfo));

                        FileStatus[] fileStatuses = fs.listStatus(new Path(p.toUri().getPath() + "/incoming/"));
                        if (fileStatuses != null) {
                            for (FileStatus fileStatus : fileStatuses) {
                                Path toPath = new Path(
                                        fileStatus.getPath().getParent().getParent().getParent().getParent(),
                                        "incoming/" + fileStatus.getPath().getName());

                                LOG.info("  Moving data from {} to {}", fileStatus.getPath(), toPath);
                                LOG.info("    mkdir {}", toPath);
                                fs.mkdirs(toPath);

                                Path fromDir = new Path(p.toUri().getPath(),
                                        "incoming/" + fileStatus.getPath().getName());
                                LOG.info("    moving from {}", fromDir);
                                FileStatus[] files = fs.listStatus(fromDir);
                                if (files == null || files.length == 0) {
                                    LOG.info("    Nothing to move from  {}", fromDir);
                                } else {
                                    for (FileStatus f : files) {
                                        LOG.info("    rename {} {}", f.getPath(),
                                                new Path(toPath, f.getPath().getName()));
                                        fs.rename(f.getPath(), new Path(toPath, f.getPath().getName()));
                                    }
                                }

                                LOG.info("    rm {}", fileStatus.getPath());
                                fs.delete(fileStatus.getPath(), true);
                            }
                            lockUtil.releaseWriteLock(lockUtil.getLockPath(lockPathInfo));

                            fs.delete(new Path(p.toUri().getPath()), true);
                        }
                    }

                    addChildren = false;
                }
            }

            // Add any children which are directories to the stack.
            if (addChildren) {
                for (int i = children.length - 1; i >= 0; i--) {
                    FileStatus child = children[i];
                    if (child.isDirectory()) {
                        paths.push(child.getPath());
                    }
                }
            }
        }

        // Since we may have deleted a bunch of directories, delete any unused
        // locks
        // from ZooKeeper.
        {
            LOG.info("Checking for unused locks in ZooKeeper");
            String scanPath = rootDir + "/" + dcNumber + "/" + service + "/" + logdir;
            if (date != null) {
                scanPath += "/" + date;
                if (hour != null) {
                    scanPath += "/" + hour;
                }
            }

            List<LockInfo> lockInfo = lockUtil.scan(scanPath);

            for (LockInfo li : lockInfo) {
                // Check if the lock path still exists in HDFS. If it doesn't, then
                // delete it from ZooKeeper.
                String path = li.getPath();
                String hdfsPath = path.substring(LockUtil.ROOT.length());
                if (!fs.exists(new Path(hdfsPath))) {
                    ZooKeeper zk = lockUtil.getZkClient();

                    while (!path.equals(LockUtil.ROOT)) {
                        try {
                            zk.delete(path, -1);
                        } catch (KeeperException.NotEmptyException e) {
                            // That's fine. just stop trying then.
                            break;
                        } catch (Exception e) {
                            LOG.error("Caught exception trying to delete from ZooKeeper.", e);
                            break;
                        }
                        LOG.info("Deleted from ZooKeeper: {}", path);
                        path = path.substring(0, path.lastIndexOf('/'));
                    }

                }
            }
        }

        // Now that we're done, wait for the Oozie Runner to stop, and print the
        // results.
        LOG.info("Waiting for Oozie jobs to complete.");
        jobRunner.shutdown();
        jobRunnerThread.join();
        LOG.info("Job Stats : Started={} Succeeded={} failed={} errors={}",
                new Object[] { jobRunner.getStarted(), jobRunner.getSucceeded(), jobRunner.getFailed(),
                        jobRunner.getErrors() });

        lockUtil.close();

    } catch (Exception e) {
        LOG.error("Unexpected exception caught.", e);
        return 1;
    }

    return 0;
}

From source file:com.blackberry.logdriver.util.IndexLogs.java

License:Apache License

private static void updateComponent(Map<String, Map<String, Map<String, Map<String, Component>>>> data,
        List<String> unmergedCSVStrings, FileSystem fs, FileStatus matchedFolder, Path path)
        throws IOException, ParseException {
    // Parse path by splitting it across slashes. To determine service (which might contain slashes) grab
    // everything after the DC name, but before the matched date string.
    String[] pathPieces = matchedFolder.getPath().toString().split("/");
    String[] servicePieces = path.toString().split(pathPieces[4] + "/");
    servicePieces = servicePieces[1].split("/" + pathPieces[pathPieces.length - 5]);
    String DC = pathPieces[4];//from w  w w.j  a  v  a  2 s .  c  o  m
    String service = servicePieces[0];
    String component = pathPieces[pathPieces.length - 2];
    String type = pathPieces[pathPieces.length - 5];
    String status = pathPieces[pathPieces.length - 1];
    Date date = inputFormat.parse(pathPieces[pathPieces.length - 4]);

    // If the _READY file doesn't exist, add it to the list
    Path READYPath = new Path(path.toString() + "/_READY");
    // System.out.println("Checking for " + READYPath.toString());
    if (!fs.exists(READYPath)) {
        unmergedCSVStrings.add(DC + "," + service + "," + type + "," + component + ","
                + pathPieces[pathPieces.length - 4] + "," + pathPieces[pathPieces.length - 3] + "\n");
        //System.out.println(unmergedCSVString);
    }

    // Check if there is a matching component, create one if not. 
    if (!componentExists(data, DC, service, type, component)) {
        data.get(DC).get(service).get(type).put(component, new Component(DC, service, type, component, date));
    }

    Component thisComponent = data.get(DC).get(service).get(type).get(component);

    // Update the start or end date if the current date is before or after, respectively. 
    if (date.before(thisComponent.startDate)) {
        thisComponent.startDate = date;
    } else if (date.after(thisComponent.endDate)) {
        thisComponent.endDate = date;
    }

    // Is the current folder an archive? If so and date is later than the current archiveDate, update it. 
    if (status.matches("archive") && date.after(thisComponent.archiveDate)) {
        thisComponent.archiveDate = date;
    }

    // Add size data
    if (status.matches("data")) {
        thisComponent.addDataSize(fs.getContentSummary(matchedFolder.getPath()).getLength());
    } else if (status.matches("incoming")) {
        thisComponent.addIncomingSize(fs.getContentSummary(matchedFolder.getPath()).getLength());
    } else if (status.matches("archive")) {
        thisComponent.addArchiveSize(fs.getContentSummary(matchedFolder.getPath()).getLength());
    }
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Set the {@link Path} of the output directory for the map-reduce job.
 *
 * @param job/*from  w  ww. j  av  a  2 s .  c o m*/
 *            The job to modify
 * @param outputDir
 *            the {@link Path} of the output directory for the map-reduce
 *            job.
 */
public static void setOutputPath(Job job, Path outputDir) {
    try {
        outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified(outputDir);
    } catch (IOException e) {
        // Throw the IOException as a RuntimeException to be compatible with
        // MR1
        throw new RuntimeException(e);
    }
    job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString());
}

From source file:com.btoddb.chronicle.apps.AvroTools.java

License:Open Source License

private void go(String srcDir) throws URISyntaxException, IOException {
    hdfsFs = FileSystem.get(new URI(srcDir), hdfsConfig);

    System.out.println();/*from   ww w. j  a  v a2s. co  m*/
    System.out.println("Processing files from " + srcDir);
    System.out.println();

    logger.debug("Searching for files in {}", srcDir);
    Path path = new Path(srcDir);
    if (!hdfsFs.exists(path)) {
        System.out.println("The path does not exist - cannot continue : " + path.toString());
        return;
    }

    FileStatus[] statuses = hdfsFs.listStatus(path, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && name.endsWith(".avro");
        }
    });

    for (FileStatus fs : statuses) {
        try {
            Path inPath = fs.getPath();
            long fileSize = hdfsFs.getFileStatus(inPath).getLen();
            System.out.println(String.format("Processing file, %s (%d)", inPath.toString(), fileSize));

            testFileAndFix(inPath);
        } catch (Exception e) {
            // don't care about the cause, the test should be able to read all files it cares about
            e.printStackTrace();
        }
    }
}

From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java

License:Apache License

/**
 * @param permission Currently ignored./*from w w  w.  ja va2 s .c  om*/
 */
@Override
public FSDataOutputStream create(Path file, FsPermission permission, boolean overwrite, int bufferSize,
        short replication, long blockSize, Progressable progress) throws IOException {

    INode inode = store.retrieveINode(makeAbsolute(file));
    if (inode != null) {
        if (overwrite) {
            delete(file, true);
        } else {
            throw new FileAlreadyExistsException("File already exists: " + file);
        }
    } else {
        Path parent = file.getParent();
        if (parent != null) {
            if (!mkdirs(parent)) {
                throw new IOException("Mkdirs failed to create " + parent.toString());
            }
        }
    }
    return new FSDataOutputStream(new RadosHDFSOutputStream(store, makeAbsolute(file).toString()));
}

From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java

License:Apache License

@Override
public FSDataInputStream open(Path path, int bufferSize) throws IOException {
    return new FSDataInputStream(new RadosHDFSInputStream(store, path.toString()));
}

From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java

License:Apache License

@Override
public boolean rename(Path src, Path dst) throws IOException {
    Path absoluteSrc = makeAbsolute(src);
    final String debugPreamble = "Renaming '" + src + "' to '" + dst + "' - ";
    INode srcINode = store.retrieveINode(absoluteSrc);
    boolean debugEnabled = LOG.isDebugEnabled();
    if (srcINode == null) {
        // src path doesn't exist
        if (debugEnabled) {
            LOG.debug(debugPreamble + "returning false as src does not exist");
        }/*from w w  w  .  jav a2  s .c o m*/
        return false;
    }

    Path absoluteDst = makeAbsolute(dst);

    //validate the parent dir of the destination
    Path dstParent = absoluteDst.getParent();
    if (dstParent != null) {
        //if the dst parent is not root, make sure it exists
        INode dstParentINode = store.retrieveINode(dstParent);
        if (dstParentINode == null) {
            // dst parent doesn't exist
            if (debugEnabled) {
                LOG.debug(debugPreamble + "returning false as dst parent does not exist");
            }
            return false;
        }
        if (dstParentINode.isFile()) {
            // dst parent exists but is a file
            if (debugEnabled) {
                LOG.debug(debugPreamble + "returning false as dst parent exists and is a file");
            }
            return false;
        }
    }

    //get status of source
    boolean srcIsFile = srcINode.isFile();

    INode dstINode = store.retrieveINode(absoluteDst);
    boolean destExists = dstINode != null;
    boolean destIsDir = destExists && !dstINode.isFile();
    if (srcIsFile) {

        //source is a simple file
        if (destExists) {
            if (destIsDir) {
                //outcome #1 dest exists and is dir -filename to subdir of dest
                if (debugEnabled) {
                    LOG.debug(debugPreamble + "copying src file under dest dir to " + absoluteDst);
                }
                absoluteDst = new Path(absoluteDst, absoluteSrc.getName());
            } else {
                //outcome #2 dest it's a file: fail iff different from src
                boolean renamingOnToSelf = absoluteSrc.equals(absoluteDst);
                if (debugEnabled) {
                    LOG.debug(debugPreamble + "copying file onto file, outcome is " + renamingOnToSelf);
                }
                return renamingOnToSelf;
            }
        } else {
            // #3 dest does not exist: use dest as path for rename
            if (debugEnabled) {
                LOG.debug(debugPreamble + "copying file onto file");
            }
        }
    } else {
        //here the source exists and is a directory
        // outcomes (given we know the parent dir exists if we get this far)
        // #1 destination is a file: fail
        // #2 destination is a directory: create a new dir under that one
        // #3 destination doesn't exist: create a new dir with that name
        // #3 and #4 are only allowed if the dest path is not == or under src

        if (destExists) {
            if (!destIsDir) {
                // #1 destination is a file: fail
                if (debugEnabled) {
                    LOG.debug(debugPreamble + "returning false as src is a directory, but not dest");
                }
                return false;
            } else {
                // the destination dir exists
                // destination for rename becomes a subdir of the target name
                absoluteDst = new Path(absoluteDst, absoluteSrc.getName());
                if (debugEnabled) {
                    LOG.debug(debugPreamble + "copying src dir under dest dir to " + absoluteDst);
                }
            }
        }
        //the final destination directory is now know, so validate it for
        //illegal moves

        if (absoluteSrc.equals(absoluteDst)) {
            //you can't rename a directory onto itself
            if (debugEnabled) {
                LOG.debug(debugPreamble + "Dest==source && isDir -failing");
            }
            return false;
        }
        if (absoluteDst.toString().startsWith(absoluteSrc.toString() + "/")) {
            //you can't move a directory under itself
            if (debugEnabled) {
                LOG.debug(debugPreamble + "dst is equal to or under src dir -failing");
            }
            return false;
        }
    }
    //here the dest path is set up -so rename
    return renameRecursive(absoluteSrc, absoluteDst);
}

From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java

License:Apache License

@Override
public boolean delete(Path path, boolean recursive) throws IOException {
    Path absolutePath = makeAbsolute(path);
    INode inode = store.retrieveINode(absolutePath);
    if (inode == null) {
        return false;
    }/*from w  ww  . j a v  a2  s.  c o  m*/
    if (inode.isFile()) {
        store.deleteINode(absolutePath);
        for (Block block : inode.getBlocks()) {
            store.deleteBlock(block);
        }
    } else {
        FileStatus[] contents = null;
        try {
            contents = listStatus(absolutePath);
        } catch (FileNotFoundException fnfe) {
            return false;
        }

        if ((contents.length != 0) && (!recursive)) {
            throw new IOException("Directory " + path.toString() + " is not empty.");
        }
        for (FileStatus p : contents) {
            if (!delete(p.getPath(), recursive)) {
                return false;
            }
        }
        store.deleteINode(absolutePath);
    }
    return true;
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.FPGrowthDriver.java

License:Apache License

/**
 * Run TopK FPGrowth given the input file,
 */// w  w w  . j a  v a  2s .  com
@Override
public int run(String[] args) throws Exception {
    addInputOption();
    addOutputOption();

    addOption("minSupport", "s",
            "(Optional) The minimum number of times a co-occurrence must be present." + " Default Value: 3",
            "3");
    addOption("maxHeapSize", "k",
            "(Optional) Maximum Heap Size k, to denote the requirement to mine top K items."
                    + " Default value: 50",
            "50");
    addOption("numGroups", "g",
            "(Optional) Number of groups the features should be divided in the map-reduce version."
                    + " Doesn't work in sequential version Default Value:" + PFPGrowth.NUM_GROUPS_DEFAULT,
            Integer.toString(PFPGrowth.NUM_GROUPS_DEFAULT));
    addOption("splitterPattern", "regex",
            "Regular Expression pattern used to split given string transaction into"
                    + " itemsets. Default value splits comma separated itemsets.  Default Value:"
                    + " \"[ ,\\t]*[,|\\t][ ,\\t]*\" ",
            "[ ,\t]*[,|\t][ ,\t]*");
    addOption("numTreeCacheEntries", "tc", "(Optional) Number of entries in the tree cache to prevent duplicate"
            + " tree building. (Warning) a first level conditional FP-Tree might consume a lot of memory, "
            + "so keep this value small, but big enough to prevent duplicate tree building. "
            + "Default Value:5 Recommended Values: [5-10]", "5");
    addOption("method", "method", "Method of processing: sequential|mapreduce", "sequential");
    addOption("encoding", "e", "(Optional) The file encoding.  Default value: UTF-8", "UTF-8");
    addFlag("useFPG2", "2", "Use an alternate FPG implementation");

    if (parseArguments(args) == null) {
        return -1;
    }

    Parameters params = new Parameters();

    if (hasOption("minSupport")) {
        String minSupportString = getOption("minSupport");
        params.set("minSupport", minSupportString);
    }
    if (hasOption("maxHeapSize")) {
        String maxHeapSizeString = getOption("maxHeapSize");
        params.set("maxHeapSize", maxHeapSizeString);
    }
    if (hasOption("numGroups")) {
        String numGroupsString = getOption("numGroups");
        params.set("numGroups", numGroupsString);
    }

    if (hasOption("numTreeCacheEntries")) {
        String numTreeCacheString = getOption("numTreeCacheEntries");
        params.set("treeCacheSize", numTreeCacheString);
    }

    if (hasOption("splitterPattern")) {
        String patternString = getOption("splitterPattern");
        params.set("splitPattern", patternString);
    }

    String encoding = "UTF-8";
    if (hasOption("encoding")) {
        encoding = getOption("encoding");
    }
    params.set("encoding", encoding);

    if (hasOption("useFPG2")) {
        params.set(PFPGrowth.USE_FPG2, "true");
    }

    Path inputDir = getInputPath();
    Path outputDir = getOutputPath();

    params.set("input", inputDir.toString());
    params.set("output", outputDir.toString());

    String classificationMethod = getOption("method");
    if ("sequential".equalsIgnoreCase(classificationMethod)) {
        runFPGrowth(params);
    } else if ("mapreduce".equalsIgnoreCase(classificationMethod)) {
        Configuration conf = new Configuration();
        HadoopUtil.delete(conf, outputDir);
        PFPGrowth.runPFPGrowth(params);
    }

    return 0;
}

From source file:com.cg.mapreduce.myfpgrowth.FPGrowthDriver.java

License:Apache License

/**
 * Run TopK FPGrowth given the input file,
 */// w  w w  .  j a  v  a2  s.  com
@Override
public int run(String[] args) throws Exception {
    addInputOption();
    addOutputOption();

    addOption("minSupport", "s",
            "(Optional) The minimum number of times a co-occurrence must be present." + " Default Value: 3",
            "3");
    addOption("maxHeapSize", "k",
            "(Optional) Maximum Heap Size k, to denote the requirement to mine top K items."
                    + " Default value: 50",
            "50");
    addOption("numGroups", "g",
            "(Optional) Number of groups the features should be divided in the map-reduce version."
                    + " Doesn't work in sequential version Default Value:" + PFPGrowth.NUM_GROUPS_DEFAULT,
            Integer.toString(PFPGrowth.NUM_GROUPS_DEFAULT));
    addOption("splitterPattern", "regex",
            "Regular Expression pattern used to split given string transaction into"
                    + " itemsets. Default value splits comma separated itemsets.  Default Value:"
                    + " \"[ ,\\t]*[,|\\t][ ,\\t]*\" ",
            "[ ,\t]*[,|\t][ ,\t]*");
    addOption("numTreeCacheEntries", "tc", "(Optional) Number of entries in the tree cache to prevent duplicate"
            + " tree building. (Warning) a first level conditional FP-Tree might consume a lot of memory, "
            + "so keep this value small, but big enough to prevent duplicate tree building. "
            + "Default Value:5 Recommended Values: [5-10]", "5");
    addOption("method", "method", "Method of processing: sequential|mapreduce", "sequential");
    addOption("encoding", "e", "(Optional) The file encoding.  Default value: UTF-8", "UTF-8");
    addFlag("useFPG2", "2", "Use an alternate FPG implementation");

    if (parseArguments(args) == null) {
        return -1;
    }

    Parameters params = new Parameters();

    if (hasOption("minSupport")) {
        String minSupportString = getOption("minSupport");
        params.set("minSupport", minSupportString);
    }
    if (hasOption("maxHeapSize")) {
        String maxHeapSizeString = getOption("maxHeapSize");
        params.set("maxHeapSize", maxHeapSizeString);
    }
    if (hasOption("numGroups")) {
        String numGroupsString = getOption("numGroups");
        params.set("numGroups", numGroupsString);
    }

    if (hasOption("numTreeCacheEntries")) {
        String numTreeCacheString = getOption("numTreeCacheEntries");
        params.set("treeCacheSize", numTreeCacheString);
    }

    if (hasOption("splitterPattern")) {
        String patternString = getOption("splitterPattern");
        params.set("splitPattern", patternString);
    }

    String encoding = "UTF-8";
    if (hasOption("encoding")) {
        encoding = getOption("encoding");
    }
    params.set("encoding", encoding);

    if (hasOption("useFPG2")) {
        params.set(PFPGrowth.USE_FPG2, "true");
    }

    Path inputDir = getInputPath();
    Path outputDir = getOutputPath();

    params.set("input", inputDir.toString());
    params.set("output", outputDir.toString());

    Configuration conf = new Configuration();
    HadoopUtil.delete(conf, outputDir);
    PFPGrowth.runPFPGrowth(params);

    return 0;
}