Example usage for org.apache.hadoop.fs PathFilter accept

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter accept.

Prototype

boolean accept(Path path);

Source Link

Document

Tests whether or not the specified abstract pathname should be included in a pathname list.

Usage

From source file:de.zib.sfs.StatisticsFileSystem.java

License:BSD License

@Override
public FileStatus[] listStatus(Path[] path, PathFilter filter) throws FileNotFoundException, IOException {
    UnwrappedPath[] unwrappedPaths = new UnwrappedPath[path.length];
    for (int i = 0; i < path.length; ++i) {
        unwrappedPaths[i] = unwrapPath(path[i]);
    }/*from  w  ww. jav  a2  s .c  om*/

    PathFilter wrappedFilter = new PathFilter() {
        @Override
        public boolean accept(Path p) {
            return filter.accept(unwrapPath(p));
        }
    };

    FileStatus[] fileStatuses = this.wrappedFS.listStatus(unwrappedPaths, wrappedFilter);
    for (int i = 0; i < fileStatuses.length; ++i) {
        if (unwrappedPaths[i].isUnwrapped()) {
            fileStatuses[i]
                    .setPath(setAuthority(wrapPath(fileStatuses[i].getPath()), path[i].toUri().getAuthority()));
        }
    }
    return fileStatuses;
}

From source file:gobblin.data.management.copy.replication.ConfigBasedDataset.java

License:Apache License

@Override
public Collection<? extends CopyEntity> getCopyableFiles(FileSystem targetFs,
        CopyConfiguration copyConfiguration) throws IOException {
    List<CopyEntity> copyableFiles = Lists.newArrayList();
    EndPoint copyFromRaw = copyRoute.getCopyFrom();
    EndPoint copyToRaw = copyRoute.getCopyTo();
    if (!(copyFromRaw instanceof HadoopFsEndPoint && copyToRaw instanceof HadoopFsEndPoint)) {
        log.warn("Currently only handle the Hadoop Fs EndPoint replication");
        return copyableFiles;
    }// w ww.j  a  v  a  2 s.c  o  m

    if ((!copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent())
            || (copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent()
                    && copyFromRaw.getWatermark().get().compareTo(copyToRaw.getWatermark().get()) <= 0)) {
        log.info(
                "No need to copy as destination watermark >= source watermark with source watermark {}, for dataset with metadata {}",
                copyFromRaw.getWatermark().isPresent() ? copyFromRaw.getWatermark().get().toJson() : "N/A",
                this.rc.getMetaData());
        return copyableFiles;
    }

    HadoopFsEndPoint copyFrom = (HadoopFsEndPoint) copyFromRaw;
    HadoopFsEndPoint copyTo = (HadoopFsEndPoint) copyToRaw;
    Configuration conf = HadoopUtils.newConfiguration();
    FileSystem copyFromFs = FileSystem.get(copyFrom.getFsURI(), conf);
    FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf);

    Collection<FileStatus> allFilesInSource = copyFrom.getFiles();
    Collection<FileStatus> allFilesInTarget = copyTo.getFiles();

    final PathFilter pathFilter = DatasetUtils.instantiatePathFilter(this.props);
    Predicate<FileStatus> predicate = new Predicate<FileStatus>() {
        @Override
        public boolean apply(FileStatus input) {
            return pathFilter.accept(input.getPath());
        }
    };

    Set<FileStatus> copyFromFileStatuses = Sets.newHashSet(Collections2.filter(allFilesInSource, predicate));
    Map<Path, FileStatus> copyToFileMap = Maps.newHashMap();
    for (FileStatus f : allFilesInTarget) {
        if (pathFilter.accept(f.getPath())) {
            copyToFileMap.put(PathUtils.getPathWithoutSchemeAndAuthority(f.getPath()), f);
        }
    }

    Collection<Path> deletedPaths = Lists.newArrayList();

    boolean watermarkMetadataCopied = false;

    boolean deleteTargetIfNotExistOnSource = rc.isDeleteTargetIfNotExistOnSource();

    for (FileStatus originFileStatus : copyFromFileStatuses) {
        Path relative = PathUtils.relativizePath(
                PathUtils.getPathWithoutSchemeAndAuthority(originFileStatus.getPath()),
                PathUtils.getPathWithoutSchemeAndAuthority(copyFrom.getDatasetPath()));
        // construct the new path in the target file system
        Path newPath = new Path(copyTo.getDatasetPath(), relative);

        if (relative.toString().equals(ReplicaHadoopFsEndPoint.WATERMARK_FILE)) {
            watermarkMetadataCopied = true;
        }

        // skip copy same file
        if (copyToFileMap.containsKey(newPath)
                && copyToFileMap.get(newPath).getLen() == originFileStatus.getLen()
                && copyToFileMap.get(newPath).getModificationTime() > originFileStatus.getModificationTime()) {
            log.debug(
                    "Copy from timestamp older than copy to timestamp, skipped copy {} for dataset with metadata {}",
                    originFileStatus.getPath(), this.rc.getMetaData());
        } else {
            // need to remove those files in the target File System
            if (copyToFileMap.containsKey(newPath)) {
                deletedPaths.add(newPath);
            }

            copyableFiles.add(CopyableFile
                    .fromOriginAndDestination(copyFromFs, originFileStatus, copyToFs.makeQualified(newPath),
                            copyConfiguration)
                    .fileSet(PathUtils.getPathWithoutSchemeAndAuthority(copyTo.getDatasetPath()).toString())
                    .build());

        }

        // clean up already checked paths
        copyToFileMap.remove(newPath);
    }

    // delete the paths on target directory if NOT exists on source
    if (deleteTargetIfNotExistOnSource) {
        deletedPaths.addAll(copyToFileMap.keySet());
    }

    // delete old files first
    if (!deletedPaths.isEmpty()) {
        DeleteFileCommitStep deleteCommitStep = DeleteFileCommitStep.fromPaths(copyToFs, deletedPaths,
                this.props);
        copyableFiles.add(new PrePublishStep(copyTo.getDatasetPath().toString(),
                Maps.<String, String>newHashMap(), deleteCommitStep, 0));
    }

    // generate the watermark file
    if ((!watermarkMetadataCopied) && copyFrom.getWatermark().isPresent()) {
        copyableFiles
                .add(new PostPublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(),
                        new WatermarkMetadataGenerationCommitStep(copyTo.getFsURI().toString(),
                                copyTo.getDatasetPath(), copyFrom.getWatermark().get()),
                        1));
    }

    return copyableFiles;
}

From source file:gobblin.util.FileListUtils.java

License:Apache License

private static List<FileStatus> listFilesRecursivelyHelper(FileSystem fs, List<FileStatus> files,
        FileStatus fileStatus, PathFilter fileFilter, boolean applyFilterToDirectories)
        throws FileNotFoundException, IOException {
    if (fileStatus.isDirectory()) {
        for (FileStatus status : fs.listStatus(fileStatus.getPath(),
                applyFilterToDirectories ? fileFilter : NO_OP_PATH_FILTER)) {
            if (fileStatus.isDirectory()) {
                listFilesRecursivelyHelper(fs, files, status, fileFilter, applyFilterToDirectories);
            } else {
                files.add(fileStatus);/*from w w w.j ava 2s.c  o  m*/
            }
        }
    } else if (fileFilter.accept(fileStatus.getPath())) {
        files.add(fileStatus);
    }
    return files;
}

From source file:gobblin.util.FileListUtils.java

License:Apache License

private static List<FileStatus> listMostNestedPathRecursivelyHelper(FileSystem fs, List<FileStatus> files,
        FileStatus fileStatus, PathFilter fileFilter) throws IOException {
    if (fileStatus.isDirectory()) {
        FileStatus[] curFileStatus = fs.listStatus(fileStatus.getPath());
        if (ArrayUtils.isEmpty(curFileStatus)) {
            files.add(fileStatus);//from   w w  w  .jav  a2  s .c om
        } else {
            for (FileStatus status : curFileStatus) {
                listMostNestedPathRecursivelyHelper(fs, files, status, fileFilter);
            }
        }
    } else if (fileFilter.accept(fileStatus.getPath())) {
        files.add(fileStatus);
    }
    return files;
}

From source file:gobblin.util.FileListUtils.java

License:Apache License

private static List<FileStatus> listPathsRecursivelyHelper(FileSystem fs, List<FileStatus> files,
        FileStatus fileStatus, PathFilter fileFilter) {
    if (fileFilter.accept(fileStatus.getPath())) {
        files.add(fileStatus);/*from   w  w w  . j  a  v a2s . com*/
    }
    if (fileStatus.isDirectory()) {
        try {
            for (FileStatus status : fs.listStatus(fileStatus.getPath())) {
                listPathsRecursivelyHelper(fs, files, status, fileFilter);
            }
        } catch (IOException ioe) {
            LOG.error("Could not list contents of path " + fileStatus.getPath());
        }
    }
    return files;
}

From source file:gobblin.util.filters.AndPathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    for (PathFilter filter : this.pathFilters) {
        if (!filter.accept(path)) {
            return false;
        }/*from ww  w  .ja  v a  2 s.co m*/
    }
    return true;
}

From source file:io.confluent.connect.hdfs.FileUtils.java

License:Apache License

private static ArrayList<FileStatus> traverseImpl(Storage storage, Path path, PathFilter filter)
        throws IOException {
    if (!storage.exists(path.toString())) {
        return new ArrayList<>();
    }/* w ww  .jav a 2  s  .  co m*/
    ArrayList<FileStatus> result = new ArrayList<>();
    FileStatus[] statuses = storage.listStatus(path.toString());
    for (FileStatus status : statuses) {
        if (status.isDirectory()) {
            result.addAll(traverseImpl(storage, status.getPath(), filter));
        } else {
            if (filter.accept(status.getPath())) {
                result.add(status);
            }
        }
    }
    return result;
}

From source file:io.confluent.connect.hdfs.utils.MemoryStorage.java

License:Apache License

@Override
public FileStatus[] listStatus(String path, PathFilter filter) throws IOException {
    if (failure == Failure.listStatusFailure) {
        failure = Failure.noFailure;//from   www .  jav  a 2  s . c  om
        throw new IOException("listStatus failed.");
    }
    List<FileStatus> result = new ArrayList<>();
    for (String key : data.keySet()) {
        if (key.startsWith(path) && filter.accept(new Path(key))) {
            FileStatus status = new FileStatus(data.get(key).size(), false, 1, 0, 0, 0, null, null, null,
                    new Path(key));
            result.add(status);
        }
    }
    return result.toArray(new FileStatus[result.size()]);
}

From source file:kogiri.mapreduce.common.kmermatch.KmerMatchInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }/* ww w  . j av  a2 s  .c o  m*/

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    filters.add(new KmerIndexIndexPathFilter());
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        if (inputFilter.accept(p)) {
            FileSystem fs = p.getFileSystem(job.getConfiguration());
            FileStatus status = fs.getFileStatus(p);
            result.add(status);
        }
    }

    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:kogiri.mapreduce.preprocess.common.kmerindex.KmerIndexInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }/*from   w  w w .  jav a 2s .  co m*/

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    filters.add(new KmerIndexPartPathFilter());
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        if (inputFilter.accept(p)) {
            FileSystem fs = p.getFileSystem(job.getConfiguration());
            FileStatus status = fs.getFileStatus(p);
            result.add(status);
        }
    }

    LOG.info("Total input paths to process : " + result.size());
    return result;
}