Example usage for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory()

Source Link

Document

Is this a directory?

Usage

From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java

License:Apache License

/**
 * Retrieves all the {@link ConfigKeyPath}s that are imported by the given {@link ConfigKeyPath}. This method does this
 * by reading the {@link #INCLUDES_CONF_FILE_NAME} file associated with the dataset specified by the given
 * {@link ConfigKeyPath}. If the {@link Path} described by the {@link ConfigKeyPath} does not exist, then an empty
 * {@link List} is returned./*ww w.  j  a  v  a  2  s .c o  m*/
 *
 * @param  configKey      the config key path whose tags are needed
 * @param  version        the configuration version in the configuration store.
 *
 * @return a {@link List} of {@link ConfigKeyPath}s where each entry is a {@link ConfigKeyPath} imported by the dataset
 * specified by the configKey.
 *
 * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}.
 */
public List<ConfigKeyPath> getOwnImports(ConfigKeyPath configKey, String version,
        Optional<Config> runtimeConfig) throws VersionDoesNotExistException {
    Preconditions.checkNotNull(configKey, "configKey cannot be null!");
    Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!");

    List<ConfigKeyPath> configKeyPaths = new ArrayList<>();
    Path datasetDir = getDatasetDirForKey(configKey, version);
    Path includesFile = new Path(datasetDir, INCLUDES_CONF_FILE_NAME);

    try {
        if (!this.fs.exists(includesFile)) {
            return configKeyPaths;
        }

        FileStatus includesFileStatus = this.fs.getFileStatus(includesFile);
        if (!includesFileStatus.isDirectory()) {
            try (InputStream includesConfInStream = this.fs.open(includesFileStatus.getPath())) {
                /*
                 * The includes returned are used to build a fallback chain.
                 * With the natural order, if a key found in the first include it is not be overriden by the next include.
                 * By reversing the list, the Typesafe fallbacks are constructed bottom up.
                 */
                configKeyPaths.addAll(Lists.newArrayList(Iterables.transform(
                        Lists.reverse(resolveIncludesList(
                                IOUtils.readLines(includesConfInStream, Charsets.UTF_8), runtimeConfig)),
                        new IncludesToConfigKey())));
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(String.format("Error while getting config for configKey: \"%s\"", configKey),
                e);
    }

    return configKeyPaths;
}

From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java

License:Apache License

/**
 * Retrieves the {@link Config} for the given {@link ConfigKeyPath} by reading the {@link #MAIN_CONF_FILE_NAME}
 * associated with the dataset specified by the given {@link ConfigKeyPath}. If the {@link Path} described by the
 * {@link ConfigKeyPath} does not exist then an empty {@link Config} is returned.
 *
 * @param  configKey      the config key path whose properties are needed.
 * @param  version        the configuration version in the configuration store.
 *
 * @return a {@link Config} for the given configKey.
 *
 * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}.
 *///  ww  w. j  a  v  a 2s  .co m
@Override
public Config getOwnConfig(ConfigKeyPath configKey, String version) throws VersionDoesNotExistException {
    Preconditions.checkNotNull(configKey, "configKey cannot be null!");
    Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!");

    Path datasetDir = getDatasetDirForKey(configKey, version);
    Path mainConfFile = new Path(datasetDir, MAIN_CONF_FILE_NAME);

    try {
        if (!this.fs.exists(mainConfFile)) {
            return ConfigFactory.empty();
        }

        FileStatus configFileStatus = this.fs.getFileStatus(mainConfFile);
        if (!configFileStatus.isDirectory()) {
            try (InputStream mainConfInputStream = this.fs.open(configFileStatus.getPath())) {
                return ConfigFactory.parseReader(new InputStreamReader(mainConfInputStream, Charsets.UTF_8));
            }
        }
        return ConfigFactory.empty();
    } catch (IOException e) {
        throw new RuntimeException(String.format("Error while getting config for configKey: \"%s\"", configKey),
                e);
    }
}

From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStoreFactory.java

License:Apache License

/**
 * This method determines the physical location of the {@link SimpleHadoopFilesystemConfigStore} root directory on HDFS. It does
 * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory
 * contains the folder {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root
 * directory./*from   w w  w. ja  v a  2  s. c  o m*/
 *
 * <p>
 *   If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does
 *   not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If
 *   the default root dir does not contain the {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME} then a
 *   {@link ConfigStoreCreationException} is thrown.
 * </p>
 */
private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException {
    if (Strings.isNullOrEmpty(configKey.getAuthority())) {
        if (getDefaultStoreURILazy() != null) {
            return getDefaultStoreURILazy();
        } else if (isAuthorityRequired()) {
            throw new ConfigStoreCreationException(configKey, "No default store has been configured.");
        }
    }

    Path path = new Path(configKey.getPath());

    while (path != null) {
        try {
            // the abs URI may point to an unexist path for
            // 1. phantom node
            // 2. as URI did not specify the version
            if (fs.exists(path)) {
                for (FileStatus fileStatus : fs.listStatus(path)) {
                    if (fileStatus.isDirectory() && fileStatus.getPath().getName()
                            .equals(SimpleHadoopFilesystemConfigStore.CONFIG_STORE_NAME)) {
                        return fs.getUri().resolve(fileStatus.getPath().getParent().toUri());
                    }
                }
            }
        } catch (IOException e) {
            throw new ConfigStoreCreationException(configKey, e);
        }

        path = path.getParent();
    }
    throw new ConfigStoreCreationException(configKey, "Cannot find the store root!");
}

From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStore.java

License:Apache License

/**
 * Retrieves all the {@link ConfigKeyPath}s that are imported by the given {@link ConfigKeyPath}. This method does this
 * by reading the {@link #INCLUDES_CONF_FILE_NAME} file associated with the dataset specified by the given
 * {@link ConfigKeyPath}. If the {@link Path} described by the {@link ConfigKeyPath} does not exist, then an empty
 * {@link List} is returned.//w  w w  .j  ava2s. c  o  m
 *
 * @param  configKey      the config key path whose tags are needed
 * @param  version        the configuration version in the configuration store.
 *
 * @return a {@link List} of {@link ConfigKeyPath}s where each entry is a {@link ConfigKeyPath} imported by the dataset
 * specified by the configKey.
 *
 * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}.
 */
@Override
public List<ConfigKeyPath> getOwnImports(ConfigKeyPath configKey, String version)
        throws VersionDoesNotExistException {
    Preconditions.checkNotNull(configKey, "configKey cannot be null!");
    Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!");

    List<ConfigKeyPath> configKeyPaths = new ArrayList<>();
    Path datasetDir = getDatasetDirForKey(configKey, version);
    Path includesFile = new Path(datasetDir, INCLUDES_CONF_FILE_NAME);

    try {
        if (!this.fs.exists(includesFile)) {
            return configKeyPaths;
        }

        FileStatus includesFileStatus = this.fs.getFileStatus(includesFile);
        if (!includesFileStatus.isDirectory()) {
            try (InputStream includesConfInStream = this.fs.open(includesFileStatus.getPath())) {
                /*
                 * The includes returned are used to build a fallback chain.
                 * With the natural order, if a key found in the first include it is not be overriden by the next include.
                 * By reversing the list, the Typesafe fallbacks are constructed bottom up.
                 */
                configKeyPaths.addAll(Lists.newArrayList(Iterables.transform(
                        Lists.reverse(
                                resolveIncludesList(IOUtils.readLines(includesConfInStream, Charsets.UTF_8))),
                        new IncludesToConfigKey())));
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(String.format("Error while getting config for configKey: \"%s\"", configKey),
                e);
    }

    return configKeyPaths;
}

From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStoreFactory.java

License:Apache License

/**
 * This method determines the physical location of the {@link SimpleHDFSConfigStore} root directory on HDFS. It does
 * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory
 * contains the folder {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root
 * directory.// ww w .  j av  a  2  s  . c  o  m
 *
 * <p>
 *   If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does
 *   not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If
 *   the default root dir does not contain the {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME} then a
 *   {@link ConfigStoreCreationException} is thrown.
 * </p>
 */
private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException {
    if (Strings.isNullOrEmpty(configKey.getAuthority())) {
        if (!hasDefaultStoreURI()) {
            throw new ConfigStoreCreationException(configKey, "No default store has been configured.");
        }
        return this.defaultStoreURI.get();
    }

    Path path = new Path(configKey.getPath());

    while (path != null) {
        try {
            // the abs URI may point to an unexist path for
            // 1. phantom node
            // 2. as URI did not specify the version
            if (fs.exists(path)) {
                for (FileStatus fileStatus : fs.listStatus(path)) {
                    if (fileStatus.isDirectory()
                            && fileStatus.getPath().getName().equals(SimpleHDFSConfigStore.CONFIG_STORE_NAME)) {
                        return fs.getUri().resolve(fileStatus.getPath().getParent().toUri());
                    }
                }
            }
        } catch (IOException e) {
            throw new ConfigStoreCreationException(configKey, e);
        }

        path = path.getParent();
    }
    throw new ConfigStoreCreationException(configKey, "Cannot find the store root!");
}

From source file:gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

private void addHdfsJars(String hdfsJarFileList, Configuration conf) throws IOException {
    for (String jarFile : SPLITTER.split(hdfsJarFileList)) {
        FileStatus[] status = this.fs.listStatus(new Path(jarFile));
        for (FileStatus fileStatus : status) {
            if (!fileStatus.isDirectory()) {
                Path path = new Path(jarFile, fileStatus.getPath().getName());
                LOG.info(String.format("Adding %s to classpath", path));
                DistributedCache.addFileToClassPath(path, conf, this.fs);
            }/*from   w w  w . j  a va 2s . c o  m*/
        }
    }
}

From source file:gobblin.runtime.spec_store.FSSpecStore.java

License:Apache License

private void getSpecs(Path directory, Collection<Spec> specs) throws IOException {
    FileStatus[] fileStatuses = fs.listStatus(directory);
    for (FileStatus fileStatus : fileStatuses) {
        if (fileStatus.isDirectory()) {
            getSpecs(fileStatus.getPath(), specs);
        } else {/*from   ww w  .  j  a  v a2 s .c o  m*/
            specs.add(readSpecFromFile(fileStatus.getPath()));
        }
    }
}

From source file:gobblin.source.extractor.extract.google.GoogleDriveFsHelper.java

License:Apache License

/**
 * List files under folder ID recursively. Folder won't be included in the result. If there's no files under folder ID, it returns empty list.
 * If folder ID is not defined, it will provide files under root directory.
 * {@inheritDoc}/*from ww  w .j  a  va2  s  . c  o m*/
 * @see gobblin.source.extractor.filebased.FileBasedHelper#ls(java.lang.String)
 */
@Override
public List<String> ls(String folderId) throws FileBasedHelperException {
    List<String> result = new ArrayList<>();
    if (StringUtils.isEmpty(folderId)) {
        folderId = "/";
    }

    Path p = new Path(folderId);
    FileStatus[] statusList = null;
    try {
        statusList = fileSystem.listStatus(p);
    } catch (FileNotFoundException e) {
        return result;
    } catch (IOException e) {
        throw new FileBasedHelperException("Falied to list status on path " + p + ", folderID: " + folderId, e);
    }

    for (FileStatus status : statusList) {
        if (status.isDirectory()) {
            result.addAll(ls(GoogleDriveFileSystem.toFileId(status.getPath())));
        } else {
            result.add(GoogleDriveFileSystem.toFileId(status.getPath()));
        }
    }
    return result;
}

From source file:gobblin.source.extractor.hadoop.HadoopFsHelper.java

License:Apache License

public void lsr(Path p, List<String> results) throws IOException {
    if (!this.fs.getFileStatus(p).isDirectory()) {
        results.add(p.toString());/*  www  . j av a 2  s.co  m*/
    }
    Path qualifiedPath = this.fs.makeQualified(p);
    for (FileStatus status : this.fs.listStatus(p)) {
        if (status.isDirectory()) {
            // Fix for hadoop issue: https://issues.apache.org/jira/browse/HADOOP-12169
            if (!qualifiedPath.equals(status.getPath())) {
                lsr(status.getPath(), results);
            }
        } else {
            results.add(status.getPath().toString());
        }
    }
}

From source file:gobblin.source.RegexBasedPartitionedRetriever.java

License:Apache License

private List<FileInfo> getOuterDirectories(FileSystem fs, long minWatermark) throws IOException {
    LOGGER.debug("Listing contents of {}", sourceDir);

    FileStatus[] fileStatus = fs.listStatus(sourceDir);
    List<FileInfo> outerDirectories = new ArrayList<>();

    for (FileStatus file : fileStatus) {
        if (!file.isDirectory()) {
            LOGGER.debug("Skipping non-directory {}", file.getPath().toUri());
            continue;
        }/*w ww  . j a v a2 s.c  o  m*/

        try {
            long watermark = getWatermarkFromString(extractWatermarkFromDirectory(file.getPath().getName()));
            if (watermark > minWatermark) {
                LOGGER.info("Processing directory {} with watermark {}", file.getPath(), watermark);
                outerDirectories.add(new FileInfo(file.getPath().toString(), 0, watermark));
            } else {
                LOGGER.info("Ignoring directory {} - watermark {} is less than minWatermark {}", file.getPath(),
                        watermark, minWatermark);
            }
        } catch (IllegalArgumentException e) {
            LOGGER.info("Directory {} ({}) does not match pattern {}; skipping", file.getPath().getName(),
                    file.getPath(), this.pattern.toString());
        }
    }

    Collections.sort(outerDirectories);
    return outerDirectories;
}