Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException 

Source Link

Document

Return an array of FileStatus objects whose path names match pathPattern and is accepted by the user-supplied path filter.

Usage

From source file:org.apache.tajo.storage.AbstractStorageManager.java

License:Apache License

/**
 * List input directories.//from ww  w  .j ava  2 s . c  om
 * Subclasses may override to, e.g., select only files matching a regular
 * expression.
 *
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(Path... dirs) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);

    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];

        FileSystem fs = p.getFileSystem(conf);
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HDFSTools.java

License:Apache License

public static List<Path> getAllFilePaths(final FileSystem fs, Path path, final PathFilter filter)
        throws IOException {
    if (null == path)
        path = fs.getHomeDirectory();/*from  ww w. ja  va2 s. co  m*/
    if (path.toString().equals(FORWARD_SLASH))
        path = new Path("");

    final List<Path> paths = new ArrayList<Path>();
    if (fs.isFile(path))
        paths.add(path);
    else {
        for (final FileStatus status : fs.globStatus(new Path(path + FORWARD_ASTERISK), filter)) {
            final Path next = status.getPath();
            paths.addAll(getAllFilePaths(fs, next, filter));
        }
    }
    return paths;
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage.java

License:Apache License

private static List<Path> getAllFilePaths(final FileSystem fs, Path path, final PathFilter filter)
        throws IOException {
    if (null == path)
        path = fs.getHomeDirectory();/*  w  ww.  j ava2s  . com*/
    if (path.toString().equals(FORWARD_SLASH))
        path = new Path("");

    final List<Path> paths = new ArrayList<Path>();
    if (fs.isFile(path))
        paths.add(path);
    else {
        for (final FileStatus status : fs.globStatus(new Path(path + FORWARD_ASTERISK), filter)) {
            final Path next = status.getPath();
            paths.addAll(getAllFilePaths(fs, next, filter));
        }
    }
    return paths;
}

From source file:org.embulk.input.parquet_hadoop.ParquetHadoopInputPlugin.java

License:Apache License

private List<FileStatus> listFileStatuses(FileSystem fs, Path rootPath) throws IOException {
    List<FileStatus> fileStatuses = Lists.newArrayList();

    FileStatus[] entries = fs.globStatus(rootPath, HiddenFileFilter.INSTANCE);
    if (entries == null) {
        return fileStatuses;
    }/* w  w w.ja  v a2s  .c om*/

    for (FileStatus entry : entries) {
        if (entry.isDirectory()) {
            List<FileStatus> subEntries = listRecursive(fs, entry);
            fileStatuses.addAll(subEntries);
        } else {
            fileStatuses.add(entry);
        }
    }

    return fileStatuses;
}

From source file:org.icgc.dcc.release.core.hadoop.FileGlobInputStream.java

License:Open Source License

private static Collection<Path> getPaths(FileSystem fileSystem, Path pathPattern)
        throws IOException, FileNotFoundException {
    FileStatus[] matches = fileSystem.globStatus(pathPattern, HIDDEN_PATH_FILTER);
    val paths = Lists.<Path>newArrayList();
    for (val match : matches) {
        if (fileSystem.isDirectory(match.getPath())) {
            FileStatus[] partFiles = fileSystem.listStatus(match.getPath(), HIDDEN_PATH_FILTER);
            for (val partFile : partFiles) {
                paths.add(partFile.getPath());
            }//  ww w.  j a  v  a2s  . co  m
        } else {
            paths.add(match.getPath());
        }
    }

    return paths;
}

From source file:org.mrgeo.hdfs.input.image.HdfsMrsImagePyramidInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    HdfsMrsImageDataProvider dp = new HdfsMrsImageDataProvider(job.getConfiguration(), input, null);
    String inputWithZoom = getZoomName(dp, inputZoom);

    // We are going to read all of the input dirs
    Path[] dirs = new Path[] { new Path(inputWithZoom) };

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);/* ww  w  . j  a  v a  2 s . c  o  m*/
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                findInputs(globStat, fs, inputFilter, result);
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}