Example usage for org.apache.hadoop.fs PathFilter accept

List of usage examples for org.apache.hadoop.fs PathFilter accept

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter accept.

Prototype

boolean accept(Path path);

Source Link

Document

Tests whether or not the specified abstract pathname should be included in a pathname list.

Usage

From source file:de.zib.sfs.StatisticsFileSystem.java

License:BSD License

@Override
public FileStatus[] listStatus(Path[] path, PathFilter filter) throws FileNotFoundException, IOException {
    UnwrappedPath[] unwrappedPaths = new UnwrappedPath[path.length];
    for (int i = 0; i < path.length; ++i) {
        unwrappedPaths[i] = unwrapPath(path[i]);
    }/*from  w  ww. jav  a2  s .c  om*/

    PathFilter wrappedFilter = new PathFilter() {
        @Override
        public boolean accept(Path p) {
            return filter.accept(unwrapPath(p));
        }
    };

    FileStatus[] fileStatuses = this.wrappedFS.listStatus(unwrappedPaths, wrappedFilter);
    for (int i = 0; i < fileStatuses.length; ++i) {
        if (unwrappedPaths[i].isUnwrapped()) {
            fileStatuses[i]
                    .setPath(setAuthority(wrapPath(fileStatuses[i].getPath()), path[i].toUri().getAuthority()));
        }
    }
    return fileStatuses;
}

From source file:gobblin.data.management.copy.replication.ConfigBasedDataset.java

License:Apache License

@Override
public Collection<? extends CopyEntity> getCopyableFiles(FileSystem targetFs,
        CopyConfiguration copyConfiguration) throws IOException {
    List<CopyEntity> copyableFiles = Lists.newArrayList();
    EndPoint copyFromRaw = copyRoute.getCopyFrom();
    EndPoint copyToRaw = copyRoute.getCopyTo();
    if (!(copyFromRaw instanceof HadoopFsEndPoint && copyToRaw instanceof HadoopFsEndPoint)) {
        log.warn("Currently only handle the Hadoop Fs EndPoint replication");
        return copyableFiles;
    }// w ww.j  a  v  a  2 s.c  o  m

    if ((!copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent())
            || (copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent()
                    && copyFromRaw.getWatermark().get().compareTo(copyToRaw.getWatermark().get()) <= 0)) {
        log.info(
                "No need to copy as destination watermark >= source watermark with source watermark {}, for dataset with metadata {}",
                copyFromRaw.getWatermark().isPresent() ? copyFromRaw.getWatermark().get().toJson() : "N/A",
                this.rc.getMetaData());
        return copyableFiles;
    }

    HadoopFsEndPoint copyFrom = (HadoopFsEndPoint) copyFromRaw;
    HadoopFsEndPoint copyTo = (HadoopFsEndPoint) copyToRaw;
    Configuration conf = HadoopUtils.newConfiguration();
    FileSystem copyFromFs = FileSystem.get(copyFrom.getFsURI(), conf);
    FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf);

    Collection<FileStatus> allFilesInSource = copyFrom.getFiles();
    Collection<FileStatus> allFilesInTarget = copyTo.getFiles();

    final PathFilter pathFilter = DatasetUtils.instantiatePathFilter(this.props);
    Predicate<FileStatus> predicate = new Predicate<FileStatus>() {
        @Override
        public boolean apply(FileStatus input) {
            return pathFilter.accept(input.getPath());
        }
    };

    Set<FileStatus> copyFromFileStatuses = Sets.newHashSet(Collections2.filter(allFilesInSource, predicate));
    Map<Path, FileStatus> copyToFileMap = Maps.newHashMap();
    for (FileStatus f : allFilesInTarget) {
        if (pathFilter.accept(f.getPath())) {
            copyToFileMap.put(PathUtils.getPathWithoutSchemeAndAuthority(f.getPath()), f);
        }
    }

    Collection<Path> deletedPaths = Lists.newArrayList();

    boolean watermarkMetadataCopied = false;

    boolean deleteTargetIfNotExistOnSource = rc.isDeleteTargetIfNotExistOnSource();

    for (FileStatus originFileStatus : copyFromFileStatuses) {
        Path relative = PathUtils.relativizePath(
                PathUtils.getPathWithoutSchemeAndAuthority(originFileStatus.getPath()),
                PathUtils.getPathWithoutSchemeAndAuthority(copyFrom.getDatasetPath()));
        // construct the new path in the target file system
        Path newPath = new Path(copyTo.getDatasetPath(), relative);

        if (relative.toString().equals(ReplicaHadoopFsEndPoint.WATERMARK_FILE)) {
            watermarkMetadataCopied = true;
        }

        // skip copy same file
        if (copyToFileMap.containsKey(newPath)
                && copyToFileMap.get(newPath).getLen() == originFileStatus.getLen()
                && copyToFileMap.get(newPath).getModificationTime() > originFileStatus.getModificationTime()) {
            log.debug(
                    "Copy from timestamp older than copy to timestamp, skipped copy {} for dataset with metadata {}",
                    originFileStatus.getPath(), this.rc.getMetaData());
        } else {
            // need to remove those files in the target File System
            if (copyToFileMap.containsKey(newPath)) {
                deletedPaths.add(newPath);
            }

            copyableFiles.add(CopyableFile
                    .fromOriginAndDestination(copyFromFs, originFileStatus, copyToFs.makeQualified(newPath),
                            copyConfiguration)
                    .fileSet(PathUtils.getPathWithoutSchemeAndAuthority(copyTo.getDatasetPath()).toString())
                    .build());

        }

        // clean up already checked paths
        copyToFileMap.remove(newPath);
    }

    // delete the paths on target directory if NOT exists on source
    if (deleteTargetIfNotExistOnSource) {
        deletedPaths.addAll(copyToFileMap.keySet());
    }

    // delete old files first
    if (!deletedPaths.isEmpty()) {
        DeleteFileCommitStep deleteCommitStep = DeleteFileCommitStep.fromPaths(copyToFs, deletedPaths,
                this.props);
        copyableFiles.add(new PrePublishStep(copyTo.getDatasetPath().toString(),
                Maps.<String, String>newHashMap(), deleteCommitStep, 0));
    }

    // generate the watermark file
    if ((!watermarkMetadataCopied) && copyFrom.getWatermark().isPresent()) {
        copyableFiles
                .add(new PostPublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(),
                        new WatermarkMetadataGenerationCommitStep(copyTo.getFsURI().toString(),
                                copyTo.getDatasetPath(), copyFrom.getWatermark().get()),
                        1));
    }

    return copyableFiles;
}

From source file:gobblin.util.FileListUtils.java

License:Apache License

private static List<FileStatus> listFilesRecursivelyHelper(FileSystem fs, List<FileStatus> files,
        FileStatus fileStatus, PathFilter fileFilter, boolean applyFilterToDirectories)
        throws FileNotFoundException, IOException {
    if (fileStatus.isDirectory()) {
        for (FileStatus status : fs.listStatus(fileStatus.getPath(),
                applyFilterToDirectories ? fileFilter : NO_OP_PATH_FILTER)) {
            if (fileStatus.isDirectory()) {
                listFilesRecursivelyHelper(fs, files, status, fileFilter, applyFilterToDirectories);
            } else {
                files.add(fileStatus);/*from w w w.j ava 2s.c  o  m*/
            }
        }
    } else if (fileFilter.accept(fileStatus.getPath())) {
        files.add(fileStatus);
    }
    return files;
}

From source file:gobblin.util.FileListUtils.java

License:Apache License

private static List<FileStatus> listMostNestedPathRecursivelyHelper(FileSystem fs, List<FileStatus> files,
        FileStatus fileStatus, PathFilter fileFilter) throws IOException {
    if (fileStatus.isDirectory()) {
        FileStatus[] curFileStatus = fs.listStatus(fileStatus.getPath());
        if (ArrayUtils.isEmpty(curFileStatus)) {
            files.add(fileStatus);//from   w w  w  .jav  a2  s .c om
        } else {
            for (FileStatus status : curFileStatus) {
                listMostNestedPathRecursivelyHelper(fs, files, status, fileFilter);
            }
        }
    } else if (fileFilter.accept(fileStatus.getPath())) {
        files.add(fileStatus);
    }
    return files;
}

From source file:gobblin.util.FileListUtils.java

License:Apache License

private static List<FileStatus> listPathsRecursivelyHelper(FileSystem fs, List<FileStatus> files,
        FileStatus fileStatus, PathFilter fileFilter) {
    if (fileFilter.accept(fileStatus.getPath())) {
        files.add(fileStatus);/*from   w  w w  . j  a  v a2s . com*/
    }
    if (fileStatus.isDirectory()) {
        try {
            for (FileStatus status : fs.listStatus(fileStatus.getPath())) {
                listPathsRecursivelyHelper(fs, files, status, fileFilter);
            }
        } catch (IOException ioe) {
            LOG.error("Could not list contents of path " + fileStatus.getPath());
        }
    }
    return files;
}

From source file:gobblin.util.filters.AndPathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    for (PathFilter filter : this.pathFilters) {
        if (!filter.accept(path)) {
            return false;
        }/*from ww  w  .ja  v a  2 s.co m*/
    }
    return true;
}

From source file:io.confluent.connect.hdfs.FileUtils.java

License:Apache License

private static ArrayList<FileStatus> traverseImpl(Storage storage, Path path, PathFilter filter)
        throws IOException {
    if (!storage.exists(path.toString())) {
        return new ArrayList<>();
    }/* w ww  .jav a 2  s  .  co m*/
    ArrayList<FileStatus> result = new ArrayList<>();
    FileStatus[] statuses = storage.listStatus(path.toString());
    for (FileStatus status : statuses) {
        if (status.isDirectory()) {
            result.addAll(traverseImpl(storage, status.getPath(), filter));
        } else {
            if (filter.accept(status.getPath())) {
                result.add(status);
            }
        }
    }
    return result;
}

From source file:io.confluent.connect.hdfs.utils.MemoryStorage.java

License:Apache License

@Override
public FileStatus[] listStatus(String path, PathFilter filter) throws IOException {
    if (failure == Failure.listStatusFailure) {
        failure = Failure.noFailure;//from   www .  jav  a 2  s . c  om
        throw new IOException("listStatus failed.");
    }
    List<FileStatus> result = new ArrayList<>();
    for (String key : data.keySet()) {
        if (key.startsWith(path) && filter.accept(new Path(key))) {
            FileStatus status = new FileStatus(data.get(key).size(), false, 1, 0, 0, 0, null, null, null,
                    new Path(key));
            result.add(status);
        }
    }
    return result.toArray(new FileStatus[result.size()]);
}

From source file:kogiri.mapreduce.common.kmermatch.KmerMatchInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }/* ww w  . j av  a2 s  .c o  m*/

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    filters.add(new KmerIndexIndexPathFilter());
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        if (inputFilter.accept(p)) {
            FileSystem fs = p.getFileSystem(job.getConfiguration());
            FileStatus status = fs.getFileStatus(p);
            result.add(status);
        }
    }

    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:kogiri.mapreduce.preprocess.common.kmerindex.KmerIndexInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }/*from   w  w w .  jav a 2s .  co m*/

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    filters.add(new KmerIndexPartPathFilter());
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        if (inputFilter.accept(p)) {
            FileSystem fs = p.getFileSystem(job.getConfiguration());
            FileStatus status = fs.getFileStatus(p);
            result.add(status);
        }
    }

    LOG.info("Total input paths to process : " + result.size());
    return result;
}