Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java

License:Apache License

/**
 * Retrieves all the {@link ConfigKeyPath}s that are imported by the given {@link ConfigKeyPath}. This method does this
 * by reading the {@link #INCLUDES_CONF_FILE_NAME} file associated with the dataset specified by the given
 * {@link ConfigKeyPath}. If the {@link Path} described by the {@link ConfigKeyPath} does not exist, then an empty
 * {@link List} is returned./*ww w.  j  a  v  a  2  s .c o  m*/
 *
 * @param  configKey      the config key path whose tags are needed
 * @param  version        the configuration version in the configuration store.
 *
 * @return a {@link List} of {@link ConfigKeyPath}s where each entry is a {@link ConfigKeyPath} imported by the dataset
 * specified by the configKey.
 *
 * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}.
 */
public List<ConfigKeyPath> getOwnImports(ConfigKeyPath configKey, String version,
        Optional<Config> runtimeConfig) throws VersionDoesNotExistException {
    Preconditions.checkNotNull(configKey, "configKey cannot be null!");
    Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!");

    List<ConfigKeyPath> configKeyPaths = new ArrayList<>();
    Path datasetDir = getDatasetDirForKey(configKey, version);
    Path includesFile = new Path(datasetDir, INCLUDES_CONF_FILE_NAME);

    try {
        if (!this.fs.exists(includesFile)) {
            return configKeyPaths;
        }

        FileStatus includesFileStatus = this.fs.getFileStatus(includesFile);
        if (!includesFileStatus.isDirectory()) {
            try (InputStream includesConfInStream = this.fs.open(includesFileStatus.getPath())) {
                /*
                 * The includes returned are used to build a fallback chain.
                 * With the natural order, if a key found in the first include it is not be overriden by the next include.
                 * By reversing the list, the Typesafe fallbacks are constructed bottom up.
                 */
                configKeyPaths.addAll(Lists.newArrayList(Iterables.transform(
                        Lists.reverse(resolveIncludesList(
                                IOUtils.readLines(includesConfInStream, Charsets.UTF_8), runtimeConfig)),
                        new IncludesToConfigKey())));
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(String.format("Error while getting config for configKey: \"%s\"", configKey),
                e);
    }

    return configKeyPaths;
}

From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java

License:Apache License

/**
 * Retrieves the {@link Config} for the given {@link ConfigKeyPath} by reading the {@link #MAIN_CONF_FILE_NAME}
 * associated with the dataset specified by the given {@link ConfigKeyPath}. If the {@link Path} described by the
 * {@link ConfigKeyPath} does not exist then an empty {@link Config} is returned.
 *
 * @param  configKey      the config key path whose properties are needed.
 * @param  version        the configuration version in the configuration store.
 *
 * @return a {@link Config} for the given configKey.
 *
 * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}.
 *///  ww  w. j  a  v  a 2s  .co m
@Override
public Config getOwnConfig(ConfigKeyPath configKey, String version) throws VersionDoesNotExistException {
    Preconditions.checkNotNull(configKey, "configKey cannot be null!");
    Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!");

    Path datasetDir = getDatasetDirForKey(configKey, version);
    Path mainConfFile = new Path(datasetDir, MAIN_CONF_FILE_NAME);

    try {
        if (!this.fs.exists(mainConfFile)) {
            return ConfigFactory.empty();
        }

        FileStatus configFileStatus = this.fs.getFileStatus(mainConfFile);
        if (!configFileStatus.isDirectory()) {
            try (InputStream mainConfInputStream = this.fs.open(configFileStatus.getPath())) {
                return ConfigFactory.parseReader(new InputStreamReader(mainConfInputStream, Charsets.UTF_8));
            }
        }
        return ConfigFactory.empty();
    } catch (IOException e) {
        throw new RuntimeException(String.format("Error while getting config for configKey: \"%s\"", configKey),
                e);
    }
}

From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStoreFactory.java

License:Apache License

/**
 * This method determines the physical location of the {@link SimpleHadoopFilesystemConfigStore} root directory on HDFS. It does
 * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory
 * contains the folder {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root
 * directory./*from   w w  w. ja  v a  2  s. c  o m*/
 *
 * <p>
 *   If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does
 *   not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If
 *   the default root dir does not contain the {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME} then a
 *   {@link ConfigStoreCreationException} is thrown.
 * </p>
 */
private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException {
    if (Strings.isNullOrEmpty(configKey.getAuthority())) {
        if (getDefaultStoreURILazy() != null) {
            return getDefaultStoreURILazy();
        } else if (isAuthorityRequired()) {
            throw new ConfigStoreCreationException(configKey, "No default store has been configured.");
        }
    }

    Path path = new Path(configKey.getPath());

    while (path != null) {
        try {
            // the abs URI may point to an unexist path for
            // 1. phantom node
            // 2. as URI did not specify the version
            if (fs.exists(path)) {
                for (FileStatus fileStatus : fs.listStatus(path)) {
                    if (fileStatus.isDirectory() && fileStatus.getPath().getName()
                            .equals(SimpleHadoopFilesystemConfigStore.CONFIG_STORE_NAME)) {
                        return fs.getUri().resolve(fileStatus.getPath().getParent().toUri());
                    }
                }
            }
        } catch (IOException e) {
            throw new ConfigStoreCreationException(configKey, e);
        }

        path = path.getParent();
    }
    throw new ConfigStoreCreationException(configKey, "Cannot find the store root!");
}

From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStore.java

License:Apache License

/**
 * Retrieves all the {@link ConfigKeyPath}s that are imported by the given {@link ConfigKeyPath}. This method does this
 * by reading the {@link #INCLUDES_CONF_FILE_NAME} file associated with the dataset specified by the given
 * {@link ConfigKeyPath}. If the {@link Path} described by the {@link ConfigKeyPath} does not exist, then an empty
 * {@link List} is returned.//w  w w  .j  ava2s. c  o  m
 *
 * @param  configKey      the config key path whose tags are needed
 * @param  version        the configuration version in the configuration store.
 *
 * @return a {@link List} of {@link ConfigKeyPath}s where each entry is a {@link ConfigKeyPath} imported by the dataset
 * specified by the configKey.
 *
 * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}.
 */
@Override
public List<ConfigKeyPath> getOwnImports(ConfigKeyPath configKey, String version)
        throws VersionDoesNotExistException {
    Preconditions.checkNotNull(configKey, "configKey cannot be null!");
    Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!");

    List<ConfigKeyPath> configKeyPaths = new ArrayList<>();
    Path datasetDir = getDatasetDirForKey(configKey, version);
    Path includesFile = new Path(datasetDir, INCLUDES_CONF_FILE_NAME);

    try {
        if (!this.fs.exists(includesFile)) {
            return configKeyPaths;
        }

        FileStatus includesFileStatus = this.fs.getFileStatus(includesFile);
        if (!includesFileStatus.isDirectory()) {
            try (InputStream includesConfInStream = this.fs.open(includesFileStatus.getPath())) {
                /*
                 * The includes returned are used to build a fallback chain.
                 * With the natural order, if a key found in the first include it is not be overriden by the next include.
                 * By reversing the list, the Typesafe fallbacks are constructed bottom up.
                 */
                configKeyPaths.addAll(Lists.newArrayList(Iterables.transform(
                        Lists.reverse(
                                resolveIncludesList(IOUtils.readLines(includesConfInStream, Charsets.UTF_8))),
                        new IncludesToConfigKey())));
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(String.format("Error while getting config for configKey: \"%s\"", configKey),
                e);
    }

    return configKeyPaths;
}

From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStoreFactory.java

License:Apache License

/**
 * This method determines the physical location of the {@link SimpleHDFSConfigStore} root directory on HDFS. It does
 * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory
 * contains the folder {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root
 * directory.// ww w .  j av  a  2  s  . c  o  m
 *
 * <p>
 *   If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does
 *   not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If
 *   the default root dir does not contain the {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME} then a
 *   {@link ConfigStoreCreationException} is thrown.
 * </p>
 */
private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException {
    if (Strings.isNullOrEmpty(configKey.getAuthority())) {
        if (!hasDefaultStoreURI()) {
            throw new ConfigStoreCreationException(configKey, "No default store has been configured.");
        }
        return this.defaultStoreURI.get();
    }

    Path path = new Path(configKey.getPath());

    while (path != null) {
        try {
            // the abs URI may point to an unexist path for
            // 1. phantom node
            // 2. as URI did not specify the version
            if (fs.exists(path)) {
                for (FileStatus fileStatus : fs.listStatus(path)) {
                    if (fileStatus.isDirectory()
                            && fileStatus.getPath().getName().equals(SimpleHDFSConfigStore.CONFIG_STORE_NAME)) {
                        return fs.getUri().resolve(fileStatus.getPath().getParent().toUri());
                    }
                }
            }
        } catch (IOException e) {
            throw new ConfigStoreCreationException(configKey, e);
        }

        path = path.getParent();
    }
    throw new ConfigStoreCreationException(configKey, "Cannot find the store root!");
}

From source file:gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

private void addHdfsJars(String hdfsJarFileList, Configuration conf) throws IOException {
    for (String jarFile : SPLITTER.split(hdfsJarFileList)) {
        FileStatus[] status = this.fs.listStatus(new Path(jarFile));
        for (FileStatus fileStatus : status) {
            if (!fileStatus.isDirectory()) {
                Path path = new Path(jarFile, fileStatus.getPath().getName());
                LOG.info(String.format("Adding %s to classpath", path));
                DistributedCache.addFileToClassPath(path, conf, this.fs);
            }/*from   w w  w . j  a va 2s . c o  m*/
        }
    }
}

From source file:gobblin.runtime.spec_store.FSSpecStore.java

License:Apache License

private void getSpecs(Path directory, Collection<Spec> specs) throws IOException {
    FileStatus[] fileStatuses = fs.listStatus(directory);
    for (FileStatus fileStatus : fileStatuses) {
        if (fileStatus.isDirectory()) {
            getSpecs(fileStatus.getPath(), specs);
        } else {/*from   ww w  .  j  a  v a2 s .c o  m*/
            specs.add(readSpecFromFile(fileStatus.getPath()));
        }
    }
}

From source file:gobblin.source.extractor.extract.google.GoogleDriveFsHelper.java

License:Apache License

/**
 * List files under folder ID recursively. Folder won't be included in the result. If there's no files under folder ID, it returns empty list.
 * If folder ID is not defined, it will provide files under root directory.
 * {@inheritDoc}/*from ww  w .j  a  va2  s  . c  o m*/
 * @see gobblin.source.extractor.filebased.FileBasedHelper#ls(java.lang.String)
 */
@Override
public List<String> ls(String folderId) throws FileBasedHelperException {
    List<String> result = new ArrayList<>();
    if (StringUtils.isEmpty(folderId)) {
        folderId = "/";
    }

    Path p = new Path(folderId);
    FileStatus[] statusList = null;
    try {
        statusList = fileSystem.listStatus(p);
    } catch (FileNotFoundException e) {
        return result;
    } catch (IOException e) {
        throw new FileBasedHelperException("Falied to list status on path " + p + ", folderID: " + folderId, e);
    }

    for (FileStatus status : statusList) {
        if (status.isDirectory()) {
            result.addAll(ls(GoogleDriveFileSystem.toFileId(status.getPath())));
        } else {
            result.add(GoogleDriveFileSystem.toFileId(status.getPath()));
        }
    }
    return result;
}

From source file:gobblin.source.extractor.hadoop.HadoopFsHelper.java

License:Apache License

public void lsr(Path p, List<String> results) throws IOException {
    if (!this.fs.getFileStatus(p).isDirectory()) {
        results.add(p.toString());/*  www  . j av a 2  s.co  m*/
    }
    Path qualifiedPath = this.fs.makeQualified(p);
    for (FileStatus status : this.fs.listStatus(p)) {
        if (status.isDirectory()) {
            // Fix for hadoop issue: https://issues.apache.org/jira/browse/HADOOP-12169
            if (!qualifiedPath.equals(status.getPath())) {
                lsr(status.getPath(), results);
            }
        } else {
            results.add(status.getPath().toString());
        }
    }
}

From source file:gobblin.source.RegexBasedPartitionedRetriever.java

License:Apache License

private List<FileInfo> getOuterDirectories(FileSystem fs, long minWatermark) throws IOException {
    LOGGER.debug("Listing contents of {}", sourceDir);

    FileStatus[] fileStatus = fs.listStatus(sourceDir);
    List<FileInfo> outerDirectories = new ArrayList<>();

    for (FileStatus file : fileStatus) {
        if (!file.isDirectory()) {
            LOGGER.debug("Skipping non-directory {}", file.getPath().toUri());
            continue;
        }/*w ww  . j a v a2 s.c  o  m*/

        try {
            long watermark = getWatermarkFromString(extractWatermarkFromDirectory(file.getPath().getName()));
            if (watermark > minWatermark) {
                LOGGER.info("Processing directory {} with watermark {}", file.getPath(), watermark);
                outerDirectories.add(new FileInfo(file.getPath().toString(), 0, watermark));
            } else {
                LOGGER.info("Ignoring directory {} - watermark {} is less than minWatermark {}", file.getPath(),
                        watermark, minWatermark);
            }
        } catch (IllegalArgumentException e) {
            LOGGER.info("Directory {} ({}) does not match pattern {}; skipping", file.getPath().getName(),
                    file.getPath(), this.pattern.toString());
        }
    }

    Collections.sort(outerDirectories);
    return outerDirectories;
}