Example usage for org.apache.hadoop.fs PathFilter accept

List of usage examples for org.apache.hadoop.fs PathFilter accept

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter accept.

Prototype

boolean accept(Path path);

Source Link

Document

Tests whether or not the specified abstract pathname should be included in a pathname list.

Usage

From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java

License:Apache License

@SuppressWarnings("deprecation")
protected List<FileStatus> listCrossValidationStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }/*from   w w  w . j ava2  s  . c  o  m*/

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // Whether we need to recursive look into the directory structure
    boolean recursive = job.getConfiguration().getBoolean("mapreduce.input.fileinputformat.input.dir.recursive",
            false);

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    FileStatus[] fss = fs.listStatus(globStat.getPath());
                    for (FileStatus fileStatus : fss) {
                        if (inputFilter.accept(fileStatus.getPath())) {
                            if (recursive && fileStatus.isDir()) {
                                addInputPathRecursive(result, fs, fileStatus.getPath(), inputFilter);
                            } else {
                                result.add(fileStatus);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total validation paths to process : " + result.size());
    return result;
}

From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java

License:Apache License

@SuppressWarnings("deprecation")
private void addInputPathRecursive(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter)
        throws IOException {
    FileStatus[] fss = fs.listStatus(path);
    for (FileStatus fileStatus : fss) {
        if (inputFilter.accept(fileStatus.getPath())) {
            if (fileStatus.isDir()) {
                addInputPathRecursive(result, fs, fileStatus.getPath(), inputFilter);
            } else {
                result.add(fileStatus);/*from   ww  w.  ja  v  a2  s  .c o m*/
            }
        }
    }
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result)
        throws IOException {
    String[] partitionsToConsider = getValidPartitions(job);
    if (partitionsToConsider.length == 0) {
        throw new IOException("No partitions/data found");
    }/*from ww  w  .  j a  va  2 s.co  m*/

    PathFilter inputFilter = getDataFileFilter(job);
    CarbonTablePath tablePath = getTablePath(job.getConfiguration());

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());

    //get all data files of valid partitions and segments
    for (int i = 0; i < partitionsToConsider.length; ++i) {
        String partition = partitionsToConsider[i];

        for (int j = 0; j < segmentsToConsider.length; ++j) {
            String segmentId = segmentsToConsider[j];
            Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
            FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());

            RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath);
            while (iter.hasNext()) {
                LocatedFileStatus stat = iter.next();
                if (inputFilter.accept(stat.getPath())) {
                    if (stat.isDirectory()) {
                        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                    } else {
                        result.add(stat);
                    }
                }
            }
        }
    }
}

From source file:org.apache.gobblin.util.FileListUtils.java

License:Apache License

private static List<FileStatus> listFilesRecursivelyHelper(FileSystem fs, List<FileStatus> files,
        FileStatus fileStatus, PathFilter fileFilter, boolean applyFilterToDirectories,
        boolean includeEmptyDirectories) throws FileNotFoundException, IOException {
    if (fileStatus.isDirectory()) {
        for (FileStatus status : fs.listStatus(fileStatus.getPath(),
                applyFilterToDirectories ? fileFilter : NO_OP_PATH_FILTER)) {
            if (status.isDirectory()) {
                // Number of files collected before diving into the directory
                int numFilesBefore = files.size();

                listFilesRecursivelyHelper(fs, files, status, fileFilter, applyFilterToDirectories,
                        includeEmptyDirectories);

                // Number of files collected after diving into the directory
                int numFilesAfter = files.size();
                if (numFilesAfter == numFilesBefore && includeEmptyDirectories) {
                    /*/*w w  w  .j  a v  a 2 s  . c o m*/
                     * This is effectively an empty directory, which needs explicit copying. Has there any data file
                     * in the directory, the directory would be created as a side-effect of copying the data file
                     */
                    files.add(status);
                }
            } else {
                listFilesRecursivelyHelper(fs, files, status, fileFilter, applyFilterToDirectories,
                        includeEmptyDirectories);
            }
        }
    } else if (fileFilter.accept(fileStatus.getPath())) {
        files.add(fileStatus);
    }
    return files;
}

From source file:org.apache.ivory.service.SharedLibraryHostingService.java

License:Apache License

public static void pushLibsToHDFS(String path, Cluster cluster, PathFilter pathFilter) throws IOException {
    Configuration conf = ClusterHelper.getConfiguration(cluster);
    FileSystem fs = FileSystem.get(conf);
    String localPaths = StartupProperties.get().getProperty("system.lib.location");
    assert localPaths != null && !localPaths.isEmpty() : "Invalid value for system.lib.location";
    if (!new File(localPaths).isDirectory()) {
        LOG.warn(localPaths + " configured for system.lib.location doesn't contain any valid libs");
        return;//from   w  w  w.j a  v  a 2s  . c  o  m
    }
    for (File localFile : new File(localPaths).listFiles()) {
        Path clusterFile = new Path(path, localFile.getName());
        if (!pathFilter.accept(clusterFile))
            continue;

        if (fs.exists(clusterFile)) {
            FileStatus fstat = fs.getFileStatus(clusterFile);
            if (fstat.getLen() == localFile.length() && fstat.getModificationTime() == localFile.lastModified())
                continue;
        }
        fs.copyFromLocalFile(false, true, new Path(localFile.getAbsolutePath()), clusterFile);
        fs.setTimes(clusterFile, localFile.lastModified(), System.currentTimeMillis());
        LOG.info("Copied " + localFile.getAbsolutePath() + " to " + path + " in " + fs.getUri());
    }
}

From source file:org.apache.mahout.clustering.display.ClustersFilterTest.java

License:Apache License

@Test
public void testAcceptNotFinal() throws Exception {
    Path path0 = new Path(output, "clusters-0");
    Path path1 = new Path(output, "clusters-1");

    path0.getFileSystem(configuration).createNewFile(path0);
    path1.getFileSystem(configuration).createNewFile(path1);

    PathFilter clustersFilter = new ClustersFilter();

    assertTrue(clustersFilter.accept(path0));
    assertTrue(clustersFilter.accept(path1));
}

From source file:org.apache.mahout.clustering.display.ClustersFilterTest.java

License:Apache License

@Test
public void testAcceptFinalPath() throws IOException {
    Path path0 = new Path(output, "clusters-0");
    Path path1 = new Path(output, "clusters-1");
    Path path2 = new Path(output, "clusters-2");
    Path path3Final = new Path(output, "clusters-3-final");

    path0.getFileSystem(configuration).createNewFile(path0);
    path1.getFileSystem(configuration).createNewFile(path1);
    path2.getFileSystem(configuration).createNewFile(path2);
    path3Final.getFileSystem(configuration).createNewFile(path3Final);

    PathFilter clustersFilter = new ClustersFilter();

    assertTrue(clustersFilter.accept(path0));
    assertTrue(clustersFilter.accept(path1));
    assertTrue(clustersFilter.accept(path2));
    assertTrue(clustersFilter.accept(path3Final));
}

From source file:org.apache.nifi.processors.hadoop.inotify.TestNotificationEventPathFilter.java

License:Apache License

@Test
public void acceptPathShouldProperlyIgnorePathsWhereTheLastComponentStartsWithADot() throws Exception {
    PathFilter filter = new NotificationEventPathFilter(Pattern.compile(".*"), true);
    assertFalse(filter.accept(new Path("/.some_hidden_file")));
    assertFalse(filter.accept(new Path("/some/long/path/.some_hidden_file/")));
}

From source file:org.apache.nifi.processors.hadoop.inotify.TestNotificationEventPathFilter.java

License:Apache License

@Test
public void acceptPathShouldProperlyAcceptPathsWhereTheNonLastComponentStartsWithADot() throws Exception {
    PathFilter filter = new NotificationEventPathFilter(Pattern.compile(".*"), true);
    assertTrue(filter.accept(new Path("/some/long/path/.some_hidden_file/should/work")));
    assertTrue(filter.accept(new Path("/.some_hidden_file/should/still/accept")));
}

From source file:org.apache.nifi.processors.hadoop.inotify.TestNotificationEventPathFilter.java

License:Apache License

@Test
public void acceptPathShouldProperlyMatchAllSubdirectoriesThatMatchWatchDirectoryAndFileFilter()
        throws Exception {
    PathFilter filter = new NotificationEventPathFilter(Pattern.compile("/root(/.*)?"), true);
    assertTrue(filter.accept(new Path("/root/sometest.txt")));
}