List of usage examples for org.apache.hadoop.fs FileSystem globStatus
public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException
From source file:org.apache.tajo.storage.AbstractStorageManager.java
License:Apache License
/** * List input directories.//from ww w .j ava 2 s . c om * Subclasses may override to, e.g., select only files matching a regular * expression. * * @return array of FileStatus objects * @throws IOException if zero items. */ protected List<FileStatus> listStatus(Path... dirs) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(conf); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDirectory()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HDFSTools.java
License:Apache License
public static List<Path> getAllFilePaths(final FileSystem fs, Path path, final PathFilter filter) throws IOException { if (null == path) path = fs.getHomeDirectory();/*from ww w. ja va2 s. co m*/ if (path.toString().equals(FORWARD_SLASH)) path = new Path(""); final List<Path> paths = new ArrayList<Path>(); if (fs.isFile(path)) paths.add(path); else { for (final FileStatus status : fs.globStatus(new Path(path + FORWARD_ASTERISK), filter)) { final Path next = status.getPath(); paths.addAll(getAllFilePaths(fs, next, filter)); } } return paths; }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage.java
License:Apache License
private static List<Path> getAllFilePaths(final FileSystem fs, Path path, final PathFilter filter) throws IOException { if (null == path) path = fs.getHomeDirectory();/* w ww. j ava2s . com*/ if (path.toString().equals(FORWARD_SLASH)) path = new Path(""); final List<Path> paths = new ArrayList<Path>(); if (fs.isFile(path)) paths.add(path); else { for (final FileStatus status : fs.globStatus(new Path(path + FORWARD_ASTERISK), filter)) { final Path next = status.getPath(); paths.addAll(getAllFilePaths(fs, next, filter)); } } return paths; }
From source file:org.embulk.input.parquet_hadoop.ParquetHadoopInputPlugin.java
License:Apache License
private List<FileStatus> listFileStatuses(FileSystem fs, Path rootPath) throws IOException { List<FileStatus> fileStatuses = Lists.newArrayList(); FileStatus[] entries = fs.globStatus(rootPath, HiddenFileFilter.INSTANCE); if (entries == null) { return fileStatuses; }/* w w w.ja v a2s .c om*/ for (FileStatus entry : entries) { if (entry.isDirectory()) { List<FileStatus> subEntries = listRecursive(fs, entry); fileStatuses.addAll(subEntries); } else { fileStatuses.add(entry); } } return fileStatuses; }
From source file:org.icgc.dcc.release.core.hadoop.FileGlobInputStream.java
License:Open Source License
private static Collection<Path> getPaths(FileSystem fileSystem, Path pathPattern) throws IOException, FileNotFoundException { FileStatus[] matches = fileSystem.globStatus(pathPattern, HIDDEN_PATH_FILTER); val paths = Lists.<Path>newArrayList(); for (val match : matches) { if (fileSystem.isDirectory(match.getPath())) { FileStatus[] partFiles = fileSystem.listStatus(match.getPath(), HIDDEN_PATH_FILTER); for (val partFile : partFiles) { paths.add(partFile.getPath()); }// ww w. j a v a2s . co m } else { paths.add(match.getPath()); } } return paths; }
From source file:org.mrgeo.hdfs.input.image.HdfsMrsImagePyramidInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); HdfsMrsImageDataProvider dp = new HdfsMrsImageDataProvider(job.getConfiguration(), input, null); String inputWithZoom = getZoomName(dp, inputZoom); // We are going to read all of the input dirs Path[] dirs = new Path[] { new Path(inputWithZoom) }; // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter);/* ww w . j a v a 2 s . c o m*/ PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { findInputs(globStat, fs, inputFilter, result); } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }