List of usage examples for org.apache.hadoop.fs FileStatus isDirectory
public boolean isDirectory()
From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java
License:Apache License
/** * Retrieves all the {@link ConfigKeyPath}s that are imported by the given {@link ConfigKeyPath}. This method does this * by reading the {@link #INCLUDES_CONF_FILE_NAME} file associated with the dataset specified by the given * {@link ConfigKeyPath}. If the {@link Path} described by the {@link ConfigKeyPath} does not exist, then an empty * {@link List} is returned./*ww w. j a v a 2 s .c o m*/ * * @param configKey the config key path whose tags are needed * @param version the configuration version in the configuration store. * * @return a {@link List} of {@link ConfigKeyPath}s where each entry is a {@link ConfigKeyPath} imported by the dataset * specified by the configKey. * * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}. */ public List<ConfigKeyPath> getOwnImports(ConfigKeyPath configKey, String version, Optional<Config> runtimeConfig) throws VersionDoesNotExistException { Preconditions.checkNotNull(configKey, "configKey cannot be null!"); Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!"); List<ConfigKeyPath> configKeyPaths = new ArrayList<>(); Path datasetDir = getDatasetDirForKey(configKey, version); Path includesFile = new Path(datasetDir, INCLUDES_CONF_FILE_NAME); try { if (!this.fs.exists(includesFile)) { return configKeyPaths; } FileStatus includesFileStatus = this.fs.getFileStatus(includesFile); if (!includesFileStatus.isDirectory()) { try (InputStream includesConfInStream = this.fs.open(includesFileStatus.getPath())) { /* * The includes returned are used to build a fallback chain. * With the natural order, if a key found in the first include it is not be overriden by the next include. * By reversing the list, the Typesafe fallbacks are constructed bottom up. */ configKeyPaths.addAll(Lists.newArrayList(Iterables.transform( Lists.reverse(resolveIncludesList( IOUtils.readLines(includesConfInStream, Charsets.UTF_8), runtimeConfig)), new IncludesToConfigKey()))); } } } catch (IOException e) { throw new RuntimeException(String.format("Error while getting config for configKey: \"%s\"", configKey), e); } return configKeyPaths; }
From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java
License:Apache License
/** * Retrieves the {@link Config} for the given {@link ConfigKeyPath} by reading the {@link #MAIN_CONF_FILE_NAME} * associated with the dataset specified by the given {@link ConfigKeyPath}. If the {@link Path} described by the * {@link ConfigKeyPath} does not exist then an empty {@link Config} is returned. * * @param configKey the config key path whose properties are needed. * @param version the configuration version in the configuration store. * * @return a {@link Config} for the given configKey. * * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}. */// ww w. j a v a 2s .co m @Override public Config getOwnConfig(ConfigKeyPath configKey, String version) throws VersionDoesNotExistException { Preconditions.checkNotNull(configKey, "configKey cannot be null!"); Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!"); Path datasetDir = getDatasetDirForKey(configKey, version); Path mainConfFile = new Path(datasetDir, MAIN_CONF_FILE_NAME); try { if (!this.fs.exists(mainConfFile)) { return ConfigFactory.empty(); } FileStatus configFileStatus = this.fs.getFileStatus(mainConfFile); if (!configFileStatus.isDirectory()) { try (InputStream mainConfInputStream = this.fs.open(configFileStatus.getPath())) { return ConfigFactory.parseReader(new InputStreamReader(mainConfInputStream, Charsets.UTF_8)); } } return ConfigFactory.empty(); } catch (IOException e) { throw new RuntimeException(String.format("Error while getting config for configKey: \"%s\"", configKey), e); } }
From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStoreFactory.java
License:Apache License
/** * This method determines the physical location of the {@link SimpleHadoopFilesystemConfigStore} root directory on HDFS. It does * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory * contains the folder {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root * directory./*from w w w. ja v a 2 s. c o m*/ * * <p> * If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does * not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If * the default root dir does not contain the {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME} then a * {@link ConfigStoreCreationException} is thrown. * </p> */ private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException { if (Strings.isNullOrEmpty(configKey.getAuthority())) { if (getDefaultStoreURILazy() != null) { return getDefaultStoreURILazy(); } else if (isAuthorityRequired()) { throw new ConfigStoreCreationException(configKey, "No default store has been configured."); } } Path path = new Path(configKey.getPath()); while (path != null) { try { // the abs URI may point to an unexist path for // 1. phantom node // 2. as URI did not specify the version if (fs.exists(path)) { for (FileStatus fileStatus : fs.listStatus(path)) { if (fileStatus.isDirectory() && fileStatus.getPath().getName() .equals(SimpleHadoopFilesystemConfigStore.CONFIG_STORE_NAME)) { return fs.getUri().resolve(fileStatus.getPath().getParent().toUri()); } } } } catch (IOException e) { throw new ConfigStoreCreationException(configKey, e); } path = path.getParent(); } throw new ConfigStoreCreationException(configKey, "Cannot find the store root!"); }
From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStore.java
License:Apache License
/** * Retrieves all the {@link ConfigKeyPath}s that are imported by the given {@link ConfigKeyPath}. This method does this * by reading the {@link #INCLUDES_CONF_FILE_NAME} file associated with the dataset specified by the given * {@link ConfigKeyPath}. If the {@link Path} described by the {@link ConfigKeyPath} does not exist, then an empty * {@link List} is returned.//w w w .j ava2s. c o m * * @param configKey the config key path whose tags are needed * @param version the configuration version in the configuration store. * * @return a {@link List} of {@link ConfigKeyPath}s where each entry is a {@link ConfigKeyPath} imported by the dataset * specified by the configKey. * * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}. */ @Override public List<ConfigKeyPath> getOwnImports(ConfigKeyPath configKey, String version) throws VersionDoesNotExistException { Preconditions.checkNotNull(configKey, "configKey cannot be null!"); Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!"); List<ConfigKeyPath> configKeyPaths = new ArrayList<>(); Path datasetDir = getDatasetDirForKey(configKey, version); Path includesFile = new Path(datasetDir, INCLUDES_CONF_FILE_NAME); try { if (!this.fs.exists(includesFile)) { return configKeyPaths; } FileStatus includesFileStatus = this.fs.getFileStatus(includesFile); if (!includesFileStatus.isDirectory()) { try (InputStream includesConfInStream = this.fs.open(includesFileStatus.getPath())) { /* * The includes returned are used to build a fallback chain. * With the natural order, if a key found in the first include it is not be overriden by the next include. * By reversing the list, the Typesafe fallbacks are constructed bottom up. */ configKeyPaths.addAll(Lists.newArrayList(Iterables.transform( Lists.reverse( resolveIncludesList(IOUtils.readLines(includesConfInStream, Charsets.UTF_8))), new IncludesToConfigKey()))); } } } catch (IOException e) { throw new RuntimeException(String.format("Error while getting config for configKey: \"%s\"", configKey), e); } return configKeyPaths; }
From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStoreFactory.java
License:Apache License
/** * This method determines the physical location of the {@link SimpleHDFSConfigStore} root directory on HDFS. It does * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory * contains the folder {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root * directory.// ww w . j av a 2 s . c o m * * <p> * If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does * not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If * the default root dir does not contain the {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME} then a * {@link ConfigStoreCreationException} is thrown. * </p> */ private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException { if (Strings.isNullOrEmpty(configKey.getAuthority())) { if (!hasDefaultStoreURI()) { throw new ConfigStoreCreationException(configKey, "No default store has been configured."); } return this.defaultStoreURI.get(); } Path path = new Path(configKey.getPath()); while (path != null) { try { // the abs URI may point to an unexist path for // 1. phantom node // 2. as URI did not specify the version if (fs.exists(path)) { for (FileStatus fileStatus : fs.listStatus(path)) { if (fileStatus.isDirectory() && fileStatus.getPath().getName().equals(SimpleHDFSConfigStore.CONFIG_STORE_NAME)) { return fs.getUri().resolve(fileStatus.getPath().getParent().toUri()); } } } } catch (IOException e) { throw new ConfigStoreCreationException(configKey, e); } path = path.getParent(); } throw new ConfigStoreCreationException(configKey, "Cannot find the store root!"); }
From source file:gobblin.runtime.mapreduce.MRJobLauncher.java
License:Apache License
private void addHdfsJars(String hdfsJarFileList, Configuration conf) throws IOException { for (String jarFile : SPLITTER.split(hdfsJarFileList)) { FileStatus[] status = this.fs.listStatus(new Path(jarFile)); for (FileStatus fileStatus : status) { if (!fileStatus.isDirectory()) { Path path = new Path(jarFile, fileStatus.getPath().getName()); LOG.info(String.format("Adding %s to classpath", path)); DistributedCache.addFileToClassPath(path, conf, this.fs); }/*from w w w . j a va 2s . c o m*/ } } }
From source file:gobblin.runtime.spec_store.FSSpecStore.java
License:Apache License
private void getSpecs(Path directory, Collection<Spec> specs) throws IOException { FileStatus[] fileStatuses = fs.listStatus(directory); for (FileStatus fileStatus : fileStatuses) { if (fileStatus.isDirectory()) { getSpecs(fileStatus.getPath(), specs); } else {/*from ww w . j a v a2 s .c o m*/ specs.add(readSpecFromFile(fileStatus.getPath())); } } }
From source file:gobblin.source.extractor.extract.google.GoogleDriveFsHelper.java
License:Apache License
/** * List files under folder ID recursively. Folder won't be included in the result. If there's no files under folder ID, it returns empty list. * If folder ID is not defined, it will provide files under root directory. * {@inheritDoc}/*from ww w .j a va2 s . c o m*/ * @see gobblin.source.extractor.filebased.FileBasedHelper#ls(java.lang.String) */ @Override public List<String> ls(String folderId) throws FileBasedHelperException { List<String> result = new ArrayList<>(); if (StringUtils.isEmpty(folderId)) { folderId = "/"; } Path p = new Path(folderId); FileStatus[] statusList = null; try { statusList = fileSystem.listStatus(p); } catch (FileNotFoundException e) { return result; } catch (IOException e) { throw new FileBasedHelperException("Falied to list status on path " + p + ", folderID: " + folderId, e); } for (FileStatus status : statusList) { if (status.isDirectory()) { result.addAll(ls(GoogleDriveFileSystem.toFileId(status.getPath()))); } else { result.add(GoogleDriveFileSystem.toFileId(status.getPath())); } } return result; }
From source file:gobblin.source.extractor.hadoop.HadoopFsHelper.java
License:Apache License
public void lsr(Path p, List<String> results) throws IOException { if (!this.fs.getFileStatus(p).isDirectory()) { results.add(p.toString());/* www . j av a 2 s.co m*/ } Path qualifiedPath = this.fs.makeQualified(p); for (FileStatus status : this.fs.listStatus(p)) { if (status.isDirectory()) { // Fix for hadoop issue: https://issues.apache.org/jira/browse/HADOOP-12169 if (!qualifiedPath.equals(status.getPath())) { lsr(status.getPath(), results); } } else { results.add(status.getPath().toString()); } } }
From source file:gobblin.source.RegexBasedPartitionedRetriever.java
License:Apache License
private List<FileInfo> getOuterDirectories(FileSystem fs, long minWatermark) throws IOException { LOGGER.debug("Listing contents of {}", sourceDir); FileStatus[] fileStatus = fs.listStatus(sourceDir); List<FileInfo> outerDirectories = new ArrayList<>(); for (FileStatus file : fileStatus) { if (!file.isDirectory()) { LOGGER.debug("Skipping non-directory {}", file.getPath().toUri()); continue; }/*w ww . j a v a2 s.c o m*/ try { long watermark = getWatermarkFromString(extractWatermarkFromDirectory(file.getPath().getName())); if (watermark > minWatermark) { LOGGER.info("Processing directory {} with watermark {}", file.getPath(), watermark); outerDirectories.add(new FileInfo(file.getPath().toString(), 0, watermark)); } else { LOGGER.info("Ignoring directory {} - watermark {} is less than minWatermark {}", file.getPath(), watermark, minWatermark); } } catch (IllegalArgumentException e) { LOGGER.info("Directory {} ({}) does not match pattern {}; skipping", file.getPath().getName(), file.getPath(), this.pattern.toString()); } } Collections.sort(outerDirectories); return outerDirectories; }