Example usage for org.apache.hadoop.fs Path equals

List of usage examples for org.apache.hadoop.fs Path equals


In this page you can find the example usage for org.apache.hadoop.fs Path equals.


    public boolean equals(Object o) 

Source Link


From source file:cascading.tap.Hfs.java

License:Open Source License

public void sourceInit(JobConf conf) throws IOException {
    Path qualifiedPath = getQualifiedPath(conf);

    for (Path exitingPath : FileInputFormat.getInputPaths(conf)) {
        if (exitingPath.equals(qualifiedPath))
            throw new TapException("may not add duplicate paths, found: " + exitingPath);
    }//from   w  w  w  .  j a  va  2s  . com

    FileInputFormat.addInputPath(conf, qualifiedPath);


    makeLocal(conf, qualifiedPath, "forcing job to local mode, via source: ");

    TupleSerialization.setSerializations(conf); // allows Hfs to be used independent of Flow

From source file:cascading.tap.hive.HiveTap.java

License:Open Source License

public boolean resourceExists(JobConf conf) throws IOException {
    IMetaStoreClient metaStoreClient = null;
    try {//from   w w w .  j av a  2 s .c  o m
        metaStoreClient = createMetaStoreClient();
        Table table = metaStoreClient.getTable(tableDescriptor.getDatabaseName(),
        modifiedTime = table.getLastAccessTime();
        // check if the schema matches the table descriptor. If not, throw an exception.
        if (strict) {
            LOG.info("strict mode: comparing existing hive table with table descriptor");
            if (!table.getTableType().equals(tableDescriptor.toHiveTable().getTableType()))
                throw new HiveTableValidationException(
                        String.format("expected a table of type '%s' but found '%s'",
                                tableDescriptor.toHiveTable().getTableType(), table.getTableType()));

            // Check that the paths are the same
            FileSystem fs = FileSystem.get(conf);
            StorageDescriptor sd = table.getSd();
            Path expectedPath = fs.makeQualified(
                    new Path(tableDescriptor.getLocation(hiveConf.getVar(ConfVars.METASTOREWAREHOUSE))));
            Path actualPath = fs.makeQualified(new Path(sd.getLocation()));

            if (!expectedPath.equals(actualPath))
                throw new HiveTableValidationException(
                        String.format("table in MetaStore does not have the sampe path. Expected %s got %s",
                                expectedPath, actualPath));

            List<FieldSchema> schemaList = sd.getCols();
            if (schemaList.size() != tableDescriptor.getColumnNames().length
                    - tableDescriptor.getPartitionKeys().length)
                throw new HiveTableValidationException(String.format(
                        "table in MetaStore does not have same number of columns. expected %d got %d",
                        tableDescriptor.getColumnNames().length - tableDescriptor.getPartitionKeys().length,
            for (int index = 0; index < schemaList.size(); index++) {
                FieldSchema schema = schemaList.get(index);
                String expectedColumnName = tableDescriptor.getColumnNames()[index];
                String expectedColumnType = tableDescriptor.getColumnTypes()[index];
                // this could be extended to the StorageDescriptor if necessary.
                if (!schema.getName().equalsIgnoreCase(expectedColumnName))
                    throw new HiveTableValidationException(
                            String.format("hive schema mismatch: expected column name '%s', but found '%s'",
                                    expectedColumnName, schema.getName()));
                if (!schema.getType().equalsIgnoreCase(expectedColumnType))
                    throw new HiveTableValidationException(
                            String.format("hive schema mismatch: expected column type '%s', but found '%s'",
                                    expectedColumnType, schema.getType()));
            List<FieldSchema> schemaPartitions = table.getPartitionKeys();
            if (schemaPartitions.size() != tableDescriptor.getPartitionKeys().length)
                throw new HiveTableValidationException(String.format(
                        "table in MetaStore does not have same number of partition columns. expected %d got %d",
                        tableDescriptor.getPartitionKeys().length, schemaPartitions.size()));
            int offset = tableDescriptor.getColumnNames().length - tableDescriptor.getPartitionKeys().length;
            for (int index = 0; index < schemaPartitions.size(); index++) {
                FieldSchema schema = schemaPartitions.get(index);
                String expectedColumnName = tableDescriptor.getColumnNames()[index + offset];
                String expectedColumnType = tableDescriptor.getColumnTypes()[index + offset];
                // this could be extended to the StorageDescriptor if necessary.
                if (!schema.getName().equalsIgnoreCase(expectedColumnName))
                    throw new HiveTableValidationException(String.format(
                            "hive partition schema mismatch: expected column name '%s', but found '%s'",
                            expectedColumnName, schema.getName()));
                if (!schema.getType().equalsIgnoreCase(expectedColumnType))
                    throw new HiveTableValidationException(String.format(
                            "hive partition schema mismatch: expected column type '%s', but found '%s'",
                            expectedColumnType, schema.getType()));
        return true;
    } catch (MetaException exception) {
        throw new IOException(exception);
    } catch (NoSuchObjectException exception) {
        return false;
    } catch (TException exception) {
        throw new IOException(exception);
    } finally {
        if (metaStoreClient != null)

From source file:com.alexholmes.hdfsslurper.Configurator.java

License:Apache License

public static void validateSameFileSystem(Path p1, Path p2, Configuration config)
        throws IOException, ConfigSettingException {
    FileSystem fs1 = p1.getFileSystem(config);
    FileSystem fs2 = p2.getFileSystem(config);
    if (!compareFs(fs1, fs2)) {
        throw new ConfigSettingException("The two paths must exist on the same file system: " + p1 + "," + p2);
    }//  w  w  w .j  a v a 2 s.  c o m

    if (p1.equals(p2)) {
        throw new ConfigSettingException("The paths must be distinct: " + p1);

From source file:com.aliyun.fs.oss.common.InMemoryFileSystemStore.java

License:Apache License

public Set<Path> listSubPaths(Path path) throws IOException {
    Path normalizedPath = normalize(path);
    // This is inefficient but more than adequate for testing purposes.
    Set<Path> subPaths = new LinkedHashSet<Path>();
    for (Path p : inodes.tailMap(normalizedPath).keySet()) {
        if (normalizedPath.equals(p.getParent())) {
            subPaths.add(p);/*ww  w  . j a va 2s . co m*/
    return subPaths;

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceCore.java

License:Apache License

private boolean isIn(FileStatus stat, Path temporary) {
    assert stat != null;
    assert temporary != null;
    Path path = stat.getPath();
    if (path.equals(temporary) || HadoopDataSourceUtil.contains(temporary, path)) {
        return true;
    }//  ww w. ja  v  a  2s  . co m
    return false;

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceCore.java

License:Apache License

private List<FileStatus> normalize(List<FileStatus> stats, Path root, Path temporary) {
    assert stats != null;
    assert root != null;
    assert temporary != null;
    List<FileStatus> results = new ArrayList<>();
    for (FileStatus stat : stats) {
        if (root.equals(stat.getPath()) == false && isIn(stat, temporary) == false) {
            results.add(stat);/*from w w w .  j  av a2 s  .  c  o  m*/
    return results;

From source file:com.blackberry.logdriver.admin.LogMaintenance.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from   ww  w .j  a va2  s.c o  m*/

    // For some reason, Oozie needs some options to be set in system instead of
    // in the confiuration. So copy the configs over.
        Iterator<Entry<String, String>> i = conf.iterator();
        while (i.hasNext()) {
            Entry<String, String> next = i.next();
            System.setProperty(next.getKey(), next.getValue());

    if (args.length < 3) {
        return 1;

    String userName = args[0];
    String dcNumber = args[1];
    String service = args[2];
    String date = null;
    String hour = null;
    if (args.length >= 4) {
        date = args[3];
    if (args.length >= 5) {
        hour = args[4];

    // Set from environment variables
    String mergeJobPropertiesFile = getConfOrEnv(conf, "MERGEJOB_CONF");
    String filterJobPropertiesFile = getConfOrEnv(conf, "FILTERJOB_CONF");
    String daysBeforeArchive = getConfOrEnv(conf, "DAYS_BEFORE_ARCHIVE");
    String daysBeforeDelete = getConfOrEnv(conf, "DAYS_BEFORE_DELETE");
    String maxConcurrentMR = getConfOrEnv(conf, "MAX_CONCURRENT_MR", "-1");
    String zkConnectString = getConfOrEnv(conf, "ZK_CONNECT_STRING");
    String logdir = getConfOrEnv(conf, "logdriver.logdir.name");
    boolean resetOrphanedJobs = Boolean.parseBoolean(getConfOrEnv(conf, "reset.orphaned.jobs", "true"));
    String rootDir = getConfOrEnv(conf, "service.root.dir");
    String maxTotalMR = getConfOrEnv(conf, "MAX_TOTAL_MR", "-1");

    boolean doMerge = true;
    boolean doArchive = true;
    boolean doDelete = true;

    if (zkConnectString == null) {
        LOG.error("ZK_CONNECT_STRING is not set.  Exiting.");
        return 1;
    if (mergeJobPropertiesFile == null) {
        LOG.info("MERGEJOB_CONF is not set.  Not merging.");
        doMerge = false;
    if (filterJobPropertiesFile == null) {
        LOG.info("FILTERJOB_CONF is not set.  Not archiving.");
        doArchive = false;
    if (daysBeforeArchive == null) {
        LOG.info("DAYS_BEFORE_ARCHIVE is not set.  Not archiving.");
        doArchive = false;
    if (doArchive && Integer.parseInt(daysBeforeArchive) < 0) {
        LOG.info("DAYS_BEFORE_ARCHIVE is negative.  Not archiving.");
        doArchive = false;
    if (daysBeforeDelete == null) {
        LOG.info("DAYS_BEFORE_DELETE is not set.  Not deleting.");
        doDelete = false;
    if (doDelete && Integer.parseInt(daysBeforeDelete) < 0) {
        LOG.info("DAYS_BEFORE_DELETE is negative.  Not deleting.");
        doDelete = false;
    if (logdir == null) {
        LOG.info("LOGDRIVER_LOGDIR_NAME is not set.  Using default value of 'logs'.");
        logdir = "logs";
    if (rootDir == null) {
        LOG.info("SERVICE_ROOT_DIR is not set.  Using default value of 'service'.");
        rootDir = "/service";

    // We can hang if this fails. So make sure we abort if it fails.
    fs = null;
    try {
        fs = FileSystem.get(conf);
        fs.exists(new Path("/")); // Test if it works.
    } catch (IOException e) {
        LOG.error("Error getting filesystem.", e);
        return 1;

    // Create the LockUtil instance
    lockUtil = new LockUtil(zkConnectString);

    // Now it's safe to create our Job Runner
    JobRunner jobRunner = new JobRunner(Integer.parseInt(maxConcurrentMR), Integer.parseInt(maxTotalMR));
    Thread jobRunnerThread = new Thread(jobRunner);

    // Figure out what date we start filters on.
    String filterCutoffDate = "";
    if (doArchive) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeArchive));
        filterCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Archiving logs from before {}", filterCutoffDate);
    String deleteCutoffDate = "";
    if (doDelete) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeDelete));
        deleteCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Deleting logs from before {}", deleteCutoffDate);

    long now = System.currentTimeMillis();

    // Various exceptions have been popping up here. So make sure I catch them
    // all.
    try {

        // Patterns to recognize hour, day and incoming directories, so that they
        // can be processed.
        Pattern datePathPattern;
        Pattern hourPathPattern;
        Pattern incomingPathPattern;
        Pattern dataPathPattern;
        Pattern archivePathPattern;
        Pattern workingPathPattern;
        if (hour != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/working/([^/]+)_(\\d+)");
        } else if (date != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        } else {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})");
            incomingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");

        // Do a depth first search of the directory, processing anything that
        // looks
        // interesting along the way
        Deque<Path> paths = new ArrayDeque<Path>();
        Path rootPath = new Path(rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/");

        while (paths.size() > 0) {
            Path p = paths.pop();
            LOG.debug("{}", p.toString());

            if (!fs.exists(p)) {

            FileStatus dirStatus = fs.getFileStatus(p);
            FileStatus[] children = fs.listStatus(p);
            boolean addChildren = true;

            boolean old = dirStatus.getModificationTime() < now - WAIT_TIME;
            LOG.debug("    Was last modified {}ms ago", now - dirStatus.getModificationTime());

            if (!old) {
                LOG.debug("    Skipping, since it's not old enough.");

            } else if ((!rootPath.equals(p)) && (children.length == 0
                    || (children.length == 1 && children[0].getPath().getName().equals(READY_MARKER)))) {
                // old and no children? Delete!
                LOG.info("    Deleting empty directory {}", p.toString());
                fs.delete(p, true);

            } else {
                Matcher matcher = datePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking date directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDirectory() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));

                matcher = hourPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking hour directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDirectory() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));

                // Check to see if we have to run a merge
                matcher = incomingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking incoming directory");
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doMerge) {

                        // old, looks right, and has children? Run it!
                        boolean hasMatchingChildren = false;
                        boolean subdirTooYoung = false;

                        for (FileStatus child : children) {
                            if (!hasMatchingChildren) {
                                FileStatus[] grandchildren = fs.listStatus(child.getPath());
                                for (FileStatus gc : grandchildren) {
                                    if (VALID_FILE.matcher(gc.getPath().getName()).matches()) {
                                        hasMatchingChildren = true;
                            if (!subdirTooYoung) {
                                if (child.getModificationTime() >= now - WAIT_TIME) {
                                    subdirTooYoung = true;
                                    LOG.debug("    Subdir {} is too young.", child.getPath());

                        if (!hasMatchingChildren) {
                            LOG.debug("    No files match the expected pattern ({})", VALID_FILE.pattern());

                        if (hasMatchingChildren && !subdirTooYoung) {
                            LOG.info("    Run Merge job {} :: {} {} {} {} {}", new Object[] { p.toString(),
                                    dcNumber, service, matchDate, matchHour, matchComponent });

                            Properties jobProps = new Properties();
                            jobProps.load(new FileInputStream(mergeJobPropertiesFile));

                            jobProps.setProperty("jobType", "merge");
                            jobProps.setProperty("rootDir", rootDir);
                            jobProps.setProperty("dcNumber", dcNumber);
                            jobProps.setProperty("service", service);
                            jobProps.setProperty("date", matchDate);
                            jobProps.setProperty("hour", matchHour);
                            jobProps.setProperty("component", matchComponent);
                            jobProps.setProperty("user.name", userName);
                            jobProps.setProperty("logdir", logdir);


                            addChildren = false;

                // Check to see if we need to run a filter and archive
                matcher = dataPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doArchive && timestamp.compareTo(filterCutoffDate) < 0) {

                        Properties jobProps = new Properties();
                        jobProps.load(new FileInputStream(filterJobPropertiesFile));

                        jobProps.setProperty("jobType", "filter");
                        jobProps.setProperty("rootDir", rootDir);
                        jobProps.setProperty("dcNumber", dcNumber);
                        jobProps.setProperty("service", service);
                        jobProps.setProperty("date", matchDate);
                        jobProps.setProperty("hour", matchHour);
                        jobProps.setProperty("component", matchComponent);
                        jobProps.setProperty("user.name", userName);
                        jobProps.setProperty("logdir", logdir);

                        // Check to see if we should just keep all or delete all here.
                        // The filter file should be here
                        String appPath = jobProps.getProperty("oozie.wf.application.path");
                        appPath = appPath.replaceFirst("\\$\\{.*?\\}", "");
                        Path filterFile = new Path(
                                appPath + "/" + conf.get("filter.definition.file", service + ".yaml"));
                        LOG.info("Filter file is {}", filterFile);
                        if (fs.exists(filterFile)) {
                            List<BoomFilterMapper.Filter> filters = BoomFilterMapper.loadFilters(matchComponent,

                            if (filters == null) {
                                        "    Got null when getting filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 0) {
                                LOG.warn("    Got no filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.KeepAllFilter) {
                                LOG.info("    Keeping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // Move files from data to archive
                                // delete it all!
                                String destination = rootDir + "/" + dcNumber + "/" + service + "/" + logdir
                                        + "/" + matchDate + "/" + matchHour + "/" + matchComponent
                                        + "/archive/";

                                PathInfo pathInfo = new PathInfo();

                                try {
                                    fs.mkdirs(new Path(destination));
                                    for (FileStatus f : fs.listStatus(p)) {
                                        fs.rename(f.getPath(), new Path(destination));
                                } finally {
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.DropAllFilter) {
                                LOG.info("    Dropping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });

                                PathInfo pathInfo = new PathInfo();

                                try {
                                    fs.delete(p, true);
                                } finally {

                            } else {
                                LOG.info("    Run Filter/Archive job {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                        } else {
                            LOG.warn("Skipping filter job, since no filter file exists");

                        addChildren = false;

                matcher = archivePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;

                matcher = workingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.info("  Matches working pattern ({})", p);
                    if (resetOrphanedJobs) {
                        String matchDate = matcher.group(1);
                        String matchHour = matcher.group(2);
                        String matchComponent = matcher.group(3);

                        // Move everything from working/xxx/incoming/ to incoming/
                        PathInfo lockPathInfo = new PathInfo(logdir, rootDir + "/" + dcNumber + "/" + service
                                + "/" + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent);

                        FileStatus[] fileStatuses = fs.listStatus(new Path(p.toUri().getPath() + "/incoming/"));
                        if (fileStatuses != null) {
                            for (FileStatus fileStatus : fileStatuses) {
                                Path toPath = new Path(
                                        "incoming/" + fileStatus.getPath().getName());

                                LOG.info("  Moving data from {} to {}", fileStatus.getPath(), toPath);
                                LOG.info("    mkdir {}", toPath);

                                Path fromDir = new Path(p.toUri().getPath(),
                                        "incoming/" + fileStatus.getPath().getName());
                                LOG.info("    moving from {}", fromDir);
                                FileStatus[] files = fs.listStatus(fromDir);
                                if (files == null || files.length == 0) {
                                    LOG.info("    Nothing to move from  {}", fromDir);
                                } else {
                                    for (FileStatus f : files) {
                                        LOG.info("    rename {} {}", f.getPath(),
                                                new Path(toPath, f.getPath().getName()));
                                        fs.rename(f.getPath(), new Path(toPath, f.getPath().getName()));

                                LOG.info("    rm {}", fileStatus.getPath());
                                fs.delete(fileStatus.getPath(), true);

                            fs.delete(new Path(p.toUri().getPath()), true);

                    addChildren = false;

            // Add any children which are directories to the stack.
            if (addChildren) {
                for (int i = children.length - 1; i >= 0; i--) {
                    FileStatus child = children[i];
                    if (child.isDirectory()) {

        // Since we may have deleted a bunch of directories, delete any unused
        // locks
        // from ZooKeeper.
            LOG.info("Checking for unused locks in ZooKeeper");
            String scanPath = rootDir + "/" + dcNumber + "/" + service + "/" + logdir;
            if (date != null) {
                scanPath += "/" + date;
                if (hour != null) {
                    scanPath += "/" + hour;

            List<LockInfo> lockInfo = lockUtil.scan(scanPath);

            for (LockInfo li : lockInfo) {
                // Check if the lock path still exists in HDFS. If it doesn't, then
                // delete it from ZooKeeper.
                String path = li.getPath();
                String hdfsPath = path.substring(LockUtil.ROOT.length());
                if (!fs.exists(new Path(hdfsPath))) {
                    ZooKeeper zk = lockUtil.getZkClient();

                    while (!path.equals(LockUtil.ROOT)) {
                        try {
                            zk.delete(path, -1);
                        } catch (KeeperException.NotEmptyException e) {
                            // That's fine. just stop trying then.
                        } catch (Exception e) {
                            LOG.error("Caught exception trying to delete from ZooKeeper.", e);
                        LOG.info("Deleted from ZooKeeper: {}", path);
                        path = path.substring(0, path.lastIndexOf('/'));


        // Now that we're done, wait for the Oozie Runner to stop, and print the
        // results.
        LOG.info("Waiting for Oozie jobs to complete.");
        LOG.info("Job Stats : Started={} Succeeded={} failed={} errors={}",
                new Object[] { jobRunner.getStarted(), jobRunner.getSucceeded(), jobRunner.getFailed(),
                        jobRunner.getErrors() });


    } catch (Exception e) {
        LOG.error("Unexpected exception caught.", e);
        return 1;

    return 0;

From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java

License:Apache License

public boolean rename(Path src, Path dst) throws IOException {
    Path absoluteSrc = makeAbsolute(src);
    final String debugPreamble = "Renaming '" + src + "' to '" + dst + "' - ";
    INode srcINode = store.retrieveINode(absoluteSrc);
    boolean debugEnabled = LOG.isDebugEnabled();
    if (srcINode == null) {
        // src path doesn't exist
        if (debugEnabled) {
            LOG.debug(debugPreamble + "returning false as src does not exist");
        }// ww w  .j a v a 2  s . c o  m
        return false;

    Path absoluteDst = makeAbsolute(dst);

    //validate the parent dir of the destination
    Path dstParent = absoluteDst.getParent();
    if (dstParent != null) {
        //if the dst parent is not root, make sure it exists
        INode dstParentINode = store.retrieveINode(dstParent);
        if (dstParentINode == null) {
            // dst parent doesn't exist
            if (debugEnabled) {
                LOG.debug(debugPreamble + "returning false as dst parent does not exist");
            return false;
        if (dstParentINode.isFile()) {
            // dst parent exists but is a file
            if (debugEnabled) {
                LOG.debug(debugPreamble + "returning false as dst parent exists and is a file");
            return false;

    //get status of source
    boolean srcIsFile = srcINode.isFile();

    INode dstINode = store.retrieveINode(absoluteDst);
    boolean destExists = dstINode != null;
    boolean destIsDir = destExists && !dstINode.isFile();
    if (srcIsFile) {

        //source is a simple file
        if (destExists) {
            if (destIsDir) {
                //outcome #1 dest exists and is dir -filename to subdir of dest
                if (debugEnabled) {
                    LOG.debug(debugPreamble + "copying src file under dest dir to " + absoluteDst);
                absoluteDst = new Path(absoluteDst, absoluteSrc.getName());
            } else {
                //outcome #2 dest it's a file: fail iff different from src
                boolean renamingOnToSelf = absoluteSrc.equals(absoluteDst);
                if (debugEnabled) {
                    LOG.debug(debugPreamble + "copying file onto file, outcome is " + renamingOnToSelf);
                return renamingOnToSelf;
        } else {
            // #3 dest does not exist: use dest as path for rename
            if (debugEnabled) {
                LOG.debug(debugPreamble + "copying file onto file");
    } else {
        //here the source exists and is a directory
        // outcomes (given we know the parent dir exists if we get this far)
        // #1 destination is a file: fail
        // #2 destination is a directory: create a new dir under that one
        // #3 destination doesn't exist: create a new dir with that name
        // #3 and #4 are only allowed if the dest path is not == or under src

        if (destExists) {
            if (!destIsDir) {
                // #1 destination is a file: fail
                if (debugEnabled) {
                    LOG.debug(debugPreamble + "returning false as src is a directory, but not dest");
                return false;
            } else {
                // the destination dir exists
                // destination for rename becomes a subdir of the target name
                absoluteDst = new Path(absoluteDst, absoluteSrc.getName());
                if (debugEnabled) {
                    LOG.debug(debugPreamble + "copying src dir under dest dir to " + absoluteDst);
        //the final destination directory is now know, so validate it for
        //illegal moves

        if (absoluteSrc.equals(absoluteDst)) {
            //you can't rename a directory onto itself
            if (debugEnabled) {
                LOG.debug(debugPreamble + "Dest==source && isDir -failing");
            return false;
        if (absoluteDst.toString().startsWith(absoluteSrc.toString() + "/")) {
            //you can't move a directory under itself
            if (debugEnabled) {
                LOG.debug(debugPreamble + "dst is equal to or under src dir -failing");
            return false;
    //here the dest path is set up -so rename
    return renameRecursive(absoluteSrc, absoluteDst);

From source file:com.cloudera.cdk.morphline.hadoop.rcfile.SingleStreamFileSystem.java

License:Apache License

public FSDataInputStream open(final Path f, final int bufferSize) throws IOException {
    if (f.equals(path)) {
        return inputStream;
    }/*from   w ww.  j ava2  s . c  o m*/
    throw new UnsupportedOperationException("Path " + f.getName() + " is not found");

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.BaseClient.java

License:Apache License

protected FileHandle lookup(Path path) throws NFS4Exception {
    Path parent;//from  ww w.  j av  a2s. c o  m
    LOGGER.info("Lookup on " + path);
    if (path.equals(ROOT)) {
        parent = path;
    } else {
        parent = path.getParent();
    FileHandle parentFileHandle = mPathFileHandleMap.get(parent);
    if (parentFileHandle == null) {
        parentFileHandle = lookup(parent);

    if (parent.equals(path)) {
        return parentFileHandle;

    CompoundRequest compoundRequest = newRequest();
    List<OperationRequest> operations = Lists.newArrayList();
    PUTFHRequest putFhRequest = new PUTFHRequest();
    LOOKUPRequest lookupRequest = new LOOKUPRequest();

    operations.add(new GETFHRequest());


    List<OperationResponse> operationResponses = getResult(compoundRequest);

    getResponse(operationResponses.remove(0), PUTFHResponse.class);
    getResponse(operationResponses.remove(0), LOOKUPResponse.class);
    GETFHResponse getFHResponse = getResponse(operationResponses.remove(0), GETFHResponse.class);
    FileHandle fileHandle = getFHResponse.getFileHandle();
    mPathFileHandleMap.put(path, fileHandle);
    mFileHandlePathMap.put(fileHandle, path);
    GETATTRResponse getAttrResponse = getResponse(operationResponses.remove(0), GETATTRResponse.class);
    mFileHandleAttributeMap.put(fileHandle, getAttrResponse.getAttrValues());
    return fileHandle;