Example usage for org.apache.hadoop.fs Path equals

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path equals.

Prototype

@Override
    public boolean equals(Object o)

Source Link

Usage

From source file:com.lightboxtechnologies.spectrum.BlockHashMapper.java

License:Apache License

void openImgFile(Path p, FileSystem fs) throws IOException {
    if (ImgFile != null && p.equals(ImgPath)) {
        return;/*w  w  w  .j  a v  a 2  s .  c  o m*/
    }
    IOUtils.closeQuietly(ImgFile);
    ImgPath = p;
    ImgFile = fs.open(p, 512 * 1024);
}

From source file:com.lightboxtechnologies.spectrum.ExtractDataMapper.java

License:Apache License

void openImgFile(Path p, FileSystem fs) throws IOException {
    if (ImgFile != null && p.equals(ImgPath)) {
        return;/*from  www.  ja  v a 2 s . c om*/
    }
    IOUtils.closeQuietly(ImgFile);
    ImgPath = p;
    ImgFile = fs.open(p, 64 * 1024 * 1024);
}

From source file:com.rim.logdriver.admin.LogMaintenance.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from  w  w w  . ja v  a2  s  .  c  o  m

    // For some reason, Oozie needs some options to be set in system instead of
    // in the confiuration. So copy the configs over.
    {
        Iterator<Entry<String, String>> i = conf.iterator();
        while (i.hasNext()) {
            Entry<String, String> next = i.next();
            System.setProperty(next.getKey(), next.getValue());
        }
    }

    if (args.length < 3) {
        printUsage();
        return 1;
    }

    String userName = args[0];
    String dcNumber = args[1];
    String service = args[2];
    String date = null;
    String hour = null;
    if (args.length >= 4) {
        date = args[3];
    }
    if (args.length >= 5) {
        hour = args[4];
    }

    // Set from environment variables
    oozieUrl = getConfOrEnv(conf, "OOZIE_URL");
    String mergeJobPropertiesFile = getConfOrEnv(conf, "MERGEJOB_CONF");
    String filterJobPropertiesFile = getConfOrEnv(conf, "FILTERJOB_CONF");
    String daysBeforeArchive = getConfOrEnv(conf, "DAYS_BEFORE_ARCHIVE");
    String daysBeforeDelete = getConfOrEnv(conf, "DAYS_BEFORE_DELETE");
    String maxConcurrentMergeJobs = getConfOrEnv(conf, "MAX_CONCURRENT_MERGE_JOBS");
    String maxConcurrentFilterJobs = getConfOrEnv(conf, "MAX_CONCURRENT_FILTER_JOBS");
    String zkConnectString = getConfOrEnv(conf, "ZK_CONNECT_STRING");
    String logdir = getConfOrEnv(conf, "logdriver.logdir.name");
    boolean resetOrphanedJobs = Boolean.parseBoolean(getConfOrEnv(conf, "reset.orphaned.jobs"));
    String rootDir = getConfOrEnv(conf, "service.root.dir");

    boolean doMerge = true;
    boolean doArchive = true;
    boolean doDelete = true;

    if (oozieUrl == null) {
        LOG.info("OOZIE_URL is not set.  Not merging or archiving.");
        doMerge = false;
        doArchive = false;
    }
    if (zkConnectString == null) {
        LOG.error("ZK_CONNECT_STRING is not set.  Exiting.");
        return 1;
    }
    if (mergeJobPropertiesFile == null) {
        LOG.info("MERGEJOB_CONF is not set.  Not merging.");
        doMerge = false;
    }
    if (filterJobPropertiesFile == null) {
        LOG.info("FILTERJOB_CONF is not set.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeArchive == null) {
        LOG.info("DAYS_BEFORE_ARCHIVE is not set.  Not archiving.");
        doArchive = false;
    }
    if (doArchive && Integer.parseInt(daysBeforeArchive) < 0) {
        LOG.info("DAYS_BEFORE_ARCHIVE is negative.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeDelete == null) {
        LOG.info("DAYS_BEFORE_DELETE is not set.  Not deleting.");
        doDelete = false;
    }
    if (doDelete && Integer.parseInt(daysBeforeDelete) < 0) {
        LOG.info("DAYS_BEFORE_DELETE is negative.  Not deleting.");
        doDelete = false;
    }
    if (maxConcurrentMergeJobs == null) {
        LOG.info("MAX_CONCURRENT_MERGE_JOBS is not set.  Using default value of -1.");
        maxConcurrentMergeJobs = "-1";
    }
    if (maxConcurrentFilterJobs == null) {
        LOG.info("MAX_CONCURRENT_FILTER_JOBS is not set.  Using default value of -1.");
        maxConcurrentMergeJobs = "-1";
    }
    if (logdir == null) {
        LOG.info("LOGDRIVER_LOGDIR_NAME is not set.  Using default value of 'logs'.");
        logdir = "logs";
    }
    if (rootDir == null) {
        LOG.info("SERVICE_ROOT_DIR is not set.  Using default value of 'service'.");
        rootDir = "/service";
    }

    // Now it's safe to create our Oozie Runners.
    OozieRunner mergeOozieRunner = new OozieRunner(oozieUrl, Integer.parseInt(maxConcurrentMergeJobs));
    Thread mergeOozieRunnerThread = new Thread(mergeOozieRunner);
    mergeOozieRunnerThread.setName("OozieRunner - Merge");
    mergeOozieRunnerThread.setDaemon(false);
    mergeOozieRunnerThread.start();

    OozieRunner filterOozieRunner = new OozieRunner(oozieUrl, Integer.parseInt(maxConcurrentFilterJobs));
    Thread filterOozieRunnerThread = new Thread(filterOozieRunner);
    filterOozieRunnerThread.setName("OozieRunner - Filter");
    filterOozieRunnerThread.setDaemon(false);
    filterOozieRunnerThread.start();

    // Figure out what date we start filters on.
    String filterCutoffDate = "";
    if (doArchive) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeArchive));
        filterCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Archiving logs from before {}", filterCutoffDate);
    }
    String deleteCutoffDate = "";
    if (doDelete) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeDelete));
        deleteCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Deleting logs from before {}", deleteCutoffDate);
    }

    long now = System.currentTimeMillis();

    // Various exceptions have been popping up here. So make sure I catch them
    // all.
    try {
        // We can hang if this fails. So make sure we abort if it fails.
        FileSystem fs = null;
        try {
            fs = FileSystem.get(conf);
            fs.exists(new Path("/")); // Test if it works.
        } catch (IOException e) {
            LOG.error("Error getting filesystem.", e);
            return 1;
        }
        // We'll need an Oozie client to check on orphaned directories.
        oozieClient = getOozieClient();

        // LockUtils are used in a couple of places
        LockUtil lu = new LockUtil(zkConnectString);

        // Patterns to recognize hour, day and incoming directories, so that they
        // can be processed.
        Pattern datePathPattern;
        Pattern hourPathPattern;
        Pattern incomingPathPattern;
        Pattern dataPathPattern;
        Pattern archivePathPattern;
        Pattern workingPathPattern;
        if (hour != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/working/([^/]+)_(\\d+)");
        } else if (date != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        } else {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})");
            incomingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        }

        // Do a depth first search of the directory, processing anything that
        // looks
        // interesting along the way
        Deque<Path> paths = new ArrayDeque<Path>();
        Path rootPath = new Path(rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/");
        paths.push(rootPath);

        while (paths.size() > 0) {
            Path p = paths.pop();
            LOG.debug("{}", p.toString());

            if (!fs.exists(p)) {
                continue;
            }

            FileStatus dirStatus = fs.getFileStatus(p);
            FileStatus[] children = fs.listStatus(p);
            boolean addChildren = true;

            boolean old = dirStatus.getModificationTime() < now - WAIT_TIME;
            LOG.debug("    Was last modified {}ms ago", now - dirStatus.getModificationTime());

            if (!old) {
                LOG.debug("    Skipping, since it's not old enough.");

            } else if ((!rootPath.equals(p)) && (children.length == 0
                    || (children.length == 1 && children[0].getPath().getName().equals(READY_MARKER)))) {
                // old and no children? Delete!
                LOG.info("    Deleting empty directory {}", p.toString());
                fs.delete(p, true);

            } else {
                Matcher matcher = datePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking date directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDir() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                matcher = hourPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking hour directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDir() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                // Check to see if we have to run a merge
                matcher = incomingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking incoming directory");
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doMerge) {

                        // old, looks right, and has children? Run it!
                        boolean hasMatchingChildren = false;
                        boolean subdirTooYoung = false;

                        for (FileStatus child : children) {
                            if (!hasMatchingChildren) {
                                FileStatus[] grandchildren = fs.listStatus(child.getPath());
                                for (FileStatus gc : grandchildren) {
                                    if (VALID_FILE.matcher(gc.getPath().getName()).matches()) {
                                        hasMatchingChildren = true;
                                        break;
                                    }
                                }
                            }
                            if (!subdirTooYoung) {
                                if (child.getModificationTime() >= now - WAIT_TIME) {
                                    subdirTooYoung = true;
                                    LOG.debug("    Subdir {} is too young.", child.getPath());
                                }
                            }
                        }

                        if (!hasMatchingChildren) {
                            LOG.debug("    No files match the expected pattern ({})", VALID_FILE.pattern());
                        }

                        if (hasMatchingChildren && !subdirTooYoung) {
                            LOG.info("    Run Merge job {} :: {} {} {} {} {}", new Object[] { p.toString(),
                                    dcNumber, service, matchDate, matchHour, matchComponent });

                            Properties oozieJobProps = new Properties();
                            oozieJobProps.load(new FileInputStream(mergeJobPropertiesFile));

                            oozieJobProps.setProperty("rootDir", rootDir);
                            oozieJobProps.setProperty("dcNumber", dcNumber);
                            oozieJobProps.setProperty("service", service);
                            oozieJobProps.setProperty("date", matchDate);
                            oozieJobProps.setProperty("hour", matchHour);
                            oozieJobProps.setProperty("component", matchComponent);
                            oozieJobProps.setProperty("user.name", userName);
                            oozieJobProps.setProperty("logdir", logdir);

                            mergeOozieRunner.submit(oozieJobProps);

                            addChildren = false;
                        }
                    }
                }

                // Check to see if we need to run a filter and archive
                matcher = dataPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doArchive && timestamp.compareTo(filterCutoffDate) < 0) {

                        Properties oozieJobProps = new Properties();
                        oozieJobProps.load(new FileInputStream(filterJobPropertiesFile));

                        oozieJobProps.setProperty("rootDir", rootDir);
                        oozieJobProps.setProperty("dcNumber", dcNumber);
                        oozieJobProps.setProperty("service", service);
                        oozieJobProps.setProperty("date", matchDate);
                        oozieJobProps.setProperty("hour", matchHour);
                        oozieJobProps.setProperty("component", matchComponent);
                        oozieJobProps.setProperty("user.name", userName);
                        oozieJobProps.setProperty("logdir", logdir);

                        // Check to see if we should just keep all or delete all here.
                        // The filter file should be here
                        String appPath = oozieJobProps.getProperty("oozie.wf.application.path");
                        appPath = appPath.replaceFirst("\\$\\{.*?\\}", "");
                        Path filterFile = new Path(appPath + "/" + service + ".yaml");
                        LOG.info("Filter file is {}", filterFile);
                        if (fs.exists(filterFile)) {
                            List<BoomFilterMapper.Filter> filters = BoomFilterMapper.loadFilters(matchComponent,
                                    fs.open(filterFile));

                            if (filters == null) {
                                LOG.warn(
                                        "    Got null when getting filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 0) {
                                LOG.warn("    Got no filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.KeepAllFilter) {
                                LOG.info("    Keeping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // Move files from data to archive
                                // delete it all!
                                String destination = rootDir + "/" + dcNumber + "/" + service + "/" + logdir
                                        + "/" + matchDate + "/" + matchHour + "/" + matchComponent
                                        + "/archive/";

                                String[] moveArgs = { zkConnectString, dcNumber, service, matchDate, matchHour,
                                        matchComponent, "move " + p.toUri().getPath() + " " + destination };
                                ToolRunner.run(new Configuration(), new LockedFs(), moveArgs);
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.DropAllFilter) {
                                LOG.info("    Dropping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // delete it all!
                                String[] delArgs = { zkConnectString, dcNumber, service, matchDate, matchHour,
                                        matchComponent, "delete " + p.toUri().getPath() };
                                ToolRunner.run(new Configuration(), new LockedFs(), delArgs);
                            } else {
                                LOG.info("    Run Filter/Archive job {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                filterOozieRunner.submit(oozieJobProps);
                            }
                        } else {
                            LOG.warn("Skipping filter job, since no filter file exists");
                        }

                        addChildren = false;
                    }
                }

                matcher = archivePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    }
                }

                matcher = workingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.info("  Matches working pattern");
                    if (resetOrphanedJobs) {
                        String matchDate = matcher.group(1);
                        String matchHour = matcher.group(2);
                        String matchComponent = matcher.group(3);
                        String matchOozieJobId = matcher.group(4);

                        // Check to see what's up with the oozie job. If it's still
                        // running,
                        // we don't want to touch it.
                        Status status = null;
                        try {
                            WorkflowJob jobInfo = oozieClient.getJobInfo(matchOozieJobId);
                            status = jobInfo.getStatus();
                        } catch (OozieClientException e) {
                            if (e.getMessage() != null && e.getMessage().contains("Job does not exist")) {
                                LOG.info("Oozie job not found.  Proceeding as though job was failed.", e);
                                status = Status.FAILED;
                            } else {
                                LOG.error("Oozie client error.  Not Proceeding.", e);
                            }
                        }
                        LOG.info("  Oozie job status is {}", status);
                        if (status != null && status != Status.RUNNING && status != Status.PREP
                                && status != Status.SUSPENDED) {
                            // Move everything from working/xxx/incoming/ to incoming/
                            PathInfo lockPathInfo = new PathInfo(rootDir + "/" + dcNumber + "/" + service + "/"
                                    + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent);
                            lu.acquireWriteLock(lu.getLockPath(lockPathInfo));

                            FileStatus[] fileStatuses = fs
                                    .listStatus(new Path(p.toUri().getPath() + "/incoming/"));
                            if (fileStatuses != null) {
                                for (FileStatus fileStatus : fileStatuses) {
                                    Path toPath = new Path(fileStatus.getPath().getParent().getParent()
                                            .getParent().getParent(),
                                            "incoming/" + fileStatus.getPath().getName());

                                    LOG.info("  Moving data from {} to {}", fileStatus.getPath(), toPath);
                                    LOG.info("    mkdir {}", toPath);
                                    fs.mkdirs(toPath);

                                    Path fromDir = new Path(p.toUri().getPath(),
                                            "incoming/" + fileStatus.getPath().getName());
                                    LOG.info("    moving from {}", fromDir);
                                    FileStatus[] files = fs.listStatus(fromDir);
                                    if (files == null || files.length == 0) {
                                        LOG.info("    Nothing to move from  {}", fromDir);
                                    } else {
                                        for (FileStatus f : files) {
                                            LOG.info("    rename {} {}", f.getPath(),
                                                    new Path(toPath, f.getPath().getName()));
                                            fs.rename(f.getPath(), new Path(toPath, f.getPath().getName()));
                                        }
                                    }

                                    LOG.info("    rm {}", fileStatus.getPath().getParent().getParent());
                                    fs.delete(fileStatus.getPath().getParent().getParent(), true);
                                }

                                lu.releaseWriteLock(lu.getLockPath(lockPathInfo));

                            }
                        }
                    }

                    addChildren = false;
                }
            }

            // Add any children which are directories to the stack.
            if (addChildren) {
                for (int i = children.length - 1; i >= 0; i--) {
                    FileStatus child = children[i];
                    if (child.isDir()) {
                        paths.push(child.getPath());
                    }
                }
            }
        }

        // Since we may have deleted a bunch of directories, delete any unused
        // locks
        // from ZooKeeper.
        {
            LOG.info("Checking for unused locks in ZooKeeper");
            String scanPath = rootDir + "/" + dcNumber + "/" + service + "/" + logdir;
            if (date != null) {
                scanPath += "/" + date;
                if (hour != null) {
                    scanPath += "/" + hour;
                }
            }

            List<LockInfo> lockInfo = lu.scan(scanPath);

            for (LockInfo li : lockInfo) {
                // Check if the lock path still exists in HDFS. If it doesn't, then
                // delete it from ZooKeeper.
                String path = li.getPath();
                String hdfsPath = path.substring(LockUtil.ROOT.length());
                if (!fs.exists(new Path(hdfsPath))) {
                    ZooKeeper zk = lu.getZkClient();

                    while (!path.equals(LockUtil.ROOT)) {
                        try {
                            zk.delete(path, -1);
                        } catch (KeeperException.NotEmptyException e) {
                            // That's fine. just stop trying then.
                            break;
                        } catch (Exception e) {
                            LOG.error("Caught exception trying to delete from ZooKeeper.", e);
                            break;
                        }
                        LOG.info("Deleted from ZooKeeper: {}", path);
                        path = path.substring(0, path.lastIndexOf('/'));
                    }

                }
            }
        }
        lu.close();

        // Now that we're done, wait for the Oozie Runner to stop, and print the
        // results.
        LOG.info("Waiting for Oozie jobs to complete.");
        mergeOozieRunner.shutdown();
        mergeOozieRunnerThread.join();
        LOG.info("Oozie Job Stats : Merge  : Started={} Succeeded={} failed={} errors={}",
                new Object[] { mergeOozieRunner.getStarted(), mergeOozieRunner.getSucceeded(),
                        mergeOozieRunner.getFailed(), mergeOozieRunner.getErrors() });

        filterOozieRunner.shutdown();
        filterOozieRunnerThread.join();
        LOG.info("Oozie Job Stats : Filter : Started={} Succeeded={} failed={} errors={}",
                new Object[] { filterOozieRunner.getStarted(), filterOozieRunner.getSucceeded(),
                        filterOozieRunner.getFailed(), filterOozieRunner.getErrors() });

    } catch (Exception e) {
        LOG.error("Unexpected exception caught.", e);
        return 1;
    }

    return 0;
}

From source file:com.splicemachine.storage.HNIOFileSystem.java

License:Apache License

@Override
public boolean isSameFile(Path path, Path path2) throws IOException {
    return path.equals(path2);
}

From source file:com.splunk.shuttl.testutil.HadoopFileSystemPutterTest.java

License:Apache License

@Test(groups = { "fast-unit" })
public void pathWhereAClassesFilesAreStored_should_differForDifferentClasses() {
    ClassA classA = new ClassA();
    ClassB classB = new ClassB();
    boolean isDifferentClassses = !classA.getClass().getName().equals(classB.getClass().getName());
    assertTrue(isDifferentClassses);/* www. j  a  v  a  2 s.  co m*/

    Path classAStoragePath = classA.getPathWhereFilesAreStored();
    Path classBStoragePath = classB.getPathWhereFilesAreStored();
    assertTrue(!classAStoragePath.equals(classBStoragePath));
}

From source file:com.splunk.shuttl.testutil.HadoopFileSystemPutterTest.java

License:Apache License

@Test(groups = { "fast-unit" })
public void path_where_localFileIsPut_should_differForDifferentFiles() {
    File file1 = createFile();/*from ww  w  .  j  a  va  2  s  . c  om*/
    File file2 = createFile();
    assertTrue(!file1.getAbsolutePath().equals(file2.getAbsolutePath()));

    Path path1 = putter.getPathForFile(file1);
    Path path2 = putter.getPathForFile(file2);
    assertTrue(!path1.equals(path2));
}

From source file:com.twitter.elephanttwin.retrieval.BlockIndexedFileInputFormat.java

License:Apache License

/**
 * Go through each original inputsplit, get its file path, and check the
 *  index file,/* w w  w.  j  a  va 2s .c om*/
 * a)  keep it, when there is no index prebuilt on this file
 *  (or the index file doesn't match with the base file's checksum;
 * b)  remove it when no matching value is found in existing index file;
 * c)  construct new smaller inputsplits using indexed blocks found
 * in the index file;
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    String inputformat = job.getConfiguration().get(REALINPUTFORMAT);
    String valueClass = job.getConfiguration().get(VALUECLASS);

    List<InputSplit> filteredList = new ArrayList<InputSplit>();

    FileInputFormat<K, V> realInputFormat = getInputFormatClass(inputformat, valueClass);

    List<InputSplit> splits = realInputFormat.getSplits(job);

    //if indexing jobs, don't skip any input splits.
    //if searching job but no searching filter, skip the index as well.
    if (isIndexingJob(job) || getFilterCondition(job) == null)
        return splits;

    Path prevFile = null; // remember the last input file we saw
    boolean foundIndexedFile = false; // is there a index file for
    // prevFile?
    boolean firstTime = true; // is this the first time we see this file?

    long totalOriginalBytes = 0; //the bytes to be scanned without indexes.
    totalBytesNewSplits = 0;
    long startTime = System.currentTimeMillis();
    LOG.info("start filtering out original input splits (total " + splits.size() + ") using indexes");
    Configuration conf = job.getConfiguration();
    long splitMaxSize;

    // for each original input split check if we can filter it out.
    for (InputSplit split : splits) {
        FileSplit fileSplit = (FileSplit) split;
        Path path = fileSplit.getPath();
        splitLength = fileSplit.getLength();
        totalOriginalBytes += fileSplit.getLength();
        splitMaxSize = Math.max(splitLength,
                conf.getInt(INDEXED_SPLIT_SIZE, conf.getInt("dfs.block.size", 256 * 1024 * 1024)));

        /*
         * for each new file we see, we first check if it has been indexed or not;
         * if not, we just add the original input split; if yes, we use the index
         * file to add filtered splits for the file
         */
        if (prevFile != null && path.equals(prevFile)) {
            firstTime = false;
        } else {
            prevFile = path;
            firstTime = true;
            foundIndexedFile = foundIndexFile(job, path);
        }

        // if no index file, we'll have to read all original input
        // splits
        if (!foundIndexedFile)
            filteredList.add(fileSplit);
        else {
            // for each file we only add once its filtered input splits using index
            // file
            if (firstTime) {
                // LOG.info("first time saw " + path
                // + ", adding filtered splits from index file");
                filteredList.addAll(getFilteredSplits(job, path, fileSplit.getLocations(), splitMaxSize));
            }
        }
    }

    long endTime = System.currentTimeMillis();
    LOG.info("finished filtering out input splits, now total splits:" + filteredList.size() + ", seconds used: "
            + (endTime - startTime) / 1000);
    LOG.info(String.format("total bytes to read before filtering: %s," + " after filtering %s, bytes ratio: %s",
            totalOriginalBytes, totalBytesNewSplits, totalOriginalBytes / Math.max(1, totalBytesNewSplits)));
    return filteredList;
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Make a path relative to another path by removing all common ancestors
 * @param parent/*from w ww  . j  a v a  2s  .  co m*/
 * @param descendant
 * @return
 */
private static Path makeRelative(Path parent, Path descendant) {
    Stack<String> components = new Stack<String>();
    while (descendant.depth() > parent.depth()) {
        components.push(descendant.getName());
        descendant = descendant.getParent();
    }
    if (!descendant.equals(parent))
        throw new RuntimeException("descendant not a child of parent");
    if (components.isEmpty())
        return new Path(".");
    Path relative = new Path(components.pop());
    while (!components.isEmpty())
        relative = new Path(relative, components.pop());
    return relative;
}

From source file:fi.tkk.ics.hadoop.bam.BAMInputFormat.java

License:Open Source License

private int addIndexedSplits(List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg)
        throws IOException {
    final Path file = ((FileSplit) splits.get(i)).getPath();

    final SplittingBAMIndex idx = new SplittingBAMIndex(file.getFileSystem(cfg).open(getIdxPath(file)));

    int splitsEnd = splits.size();
    for (int j = i; j < splitsEnd; ++j)
        if (!file.equals(((FileSplit) splits.get(j)).getPath()))
            splitsEnd = j;//  ww w .  ja  v  a 2  s.c om

    for (int j = i; j < splitsEnd; ++j) {
        final FileSplit fileSplit = (FileSplit) splits.get(j);

        final long start = fileSplit.getStart();
        final long end = start + fileSplit.getLength();

        final Long blockStart = idx.nextAlignment(start);

        // The last split needs to end where the last alignment ends, but the
        // index doesn't store that data (whoops); we only know where the last
        // alignment begins. Fortunately there's no need to change the index
        // format for this: we can just set the end to the maximal length of
        // the final BGZF block (0xffff), and then read until BAMRecordCodec
        // hits EOF.
        final Long blockEnd = j == splitsEnd - 1 ? idx.prevAlignment(end) | 0xffff : idx.nextAlignment(end);

        if (blockStart == null)
            throw new RuntimeException("Internal error or invalid index: no block start for " + start);

        if (blockEnd == null)
            throw new RuntimeException("Internal error or invalid index: no block end for " + end);

        newSplits.add(new FileVirtualSplit(file, blockStart, blockEnd, fileSplit.getLocations()));
    }
    return splitsEnd;
}

From source file:fi.tkk.ics.hadoop.bam.util.BGZFSplitFileInputFormat.java

License:Open Source License

private int addIndexedSplits(List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg)
        throws IOException {
    final Path file = ((FileSplit) splits.get(i)).getPath();

    final BGZFBlockIndex idx = new BGZFBlockIndex(file.getFileSystem(cfg).open(getIdxPath(file)));

    int splitsEnd = splits.size();
    for (int j = i; j < splitsEnd; ++j)
        if (!file.equals(((FileSplit) splits.get(j)).getPath()))
            splitsEnd = j;//from  w ww. j  av a  2s.c om

    for (int j = i; j < splitsEnd; ++j) {
        final FileSplit fileSplit = (FileSplit) splits.get(j);

        final long start = fileSplit.getStart();
        final long end = start + fileSplit.getLength();

        final Long blockStart = idx.prevBlock(start);
        final Long blockEnd = j == splitsEnd - 1 ? idx.prevBlock(end) : idx.nextBlock(end);

        if (blockStart == null)
            throw new RuntimeException("Internal error or invalid index: no block start for " + start);

        if (blockEnd == null)
            throw new RuntimeException("Internal error or invalid index: no block end for " + end);

        newSplits.add(new FileSplit(file, blockStart, blockEnd - blockStart, fileSplit.getLocations()));
    }
    return splitsEnd;
}