List of usage examples for org.apache.hadoop.fs PathFilter accept
boolean accept(Path path);
From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java
License:Apache License
@SuppressWarnings("deprecation") protected List<FileStatus> listCrossValidationStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); }/*from w w w . j ava2 s . c o m*/ // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // Whether we need to recursive look into the directory structure boolean recursive = job.getConfiguration().getBoolean("mapreduce.input.fileinputformat.input.dir.recursive", false); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { FileStatus[] fss = fs.listStatus(globStat.getPath()); for (FileStatus fileStatus : fss) { if (inputFilter.accept(fileStatus.getPath())) { if (recursive && fileStatus.isDir()) { addInputPathRecursive(result, fs, fileStatus.getPath(), inputFilter); } else { result.add(fileStatus); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total validation paths to process : " + result.size()); return result; }
From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java
License:Apache License
@SuppressWarnings("deprecation") private void addInputPathRecursive(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { FileStatus[] fss = fs.listStatus(path); for (FileStatus fileStatus : fss) { if (inputFilter.accept(fileStatus.getPath())) { if (fileStatus.isDir()) { addInputPathRecursive(result, fs, fileStatus.getPath(), inputFilter); } else { result.add(fileStatus);/*from ww w. ja v a2 s .c o m*/ } } } }
From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java
License:Apache License
private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result) throws IOException { String[] partitionsToConsider = getValidPartitions(job); if (partitionsToConsider.length == 0) { throw new IOException("No partitions/data found"); }/*from ww w . j a va 2 s.co m*/ PathFilter inputFilter = getDataFileFilter(job); CarbonTablePath tablePath = getTablePath(job.getConfiguration()); // get tokens for all the required FileSystem for table path TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration()); //get all data files of valid partitions and segments for (int i = 0; i < partitionsToConsider.length; ++i) { String partition = partitionsToConsider[i]; for (int j = 0; j < segmentsToConsider.length; ++j) { String segmentId = segmentsToConsider[j]; Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId)); FileSystem fs = segmentPath.getFileSystem(job.getConfiguration()); RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } } }
From source file:org.apache.gobblin.util.FileListUtils.java
License:Apache License
private static List<FileStatus> listFilesRecursivelyHelper(FileSystem fs, List<FileStatus> files, FileStatus fileStatus, PathFilter fileFilter, boolean applyFilterToDirectories, boolean includeEmptyDirectories) throws FileNotFoundException, IOException { if (fileStatus.isDirectory()) { for (FileStatus status : fs.listStatus(fileStatus.getPath(), applyFilterToDirectories ? fileFilter : NO_OP_PATH_FILTER)) { if (status.isDirectory()) { // Number of files collected before diving into the directory int numFilesBefore = files.size(); listFilesRecursivelyHelper(fs, files, status, fileFilter, applyFilterToDirectories, includeEmptyDirectories); // Number of files collected after diving into the directory int numFilesAfter = files.size(); if (numFilesAfter == numFilesBefore && includeEmptyDirectories) { /*/*w w w .j a v a 2 s . c o m*/ * This is effectively an empty directory, which needs explicit copying. Has there any data file * in the directory, the directory would be created as a side-effect of copying the data file */ files.add(status); } } else { listFilesRecursivelyHelper(fs, files, status, fileFilter, applyFilterToDirectories, includeEmptyDirectories); } } } else if (fileFilter.accept(fileStatus.getPath())) { files.add(fileStatus); } return files; }
From source file:org.apache.ivory.service.SharedLibraryHostingService.java
License:Apache License
public static void pushLibsToHDFS(String path, Cluster cluster, PathFilter pathFilter) throws IOException { Configuration conf = ClusterHelper.getConfiguration(cluster); FileSystem fs = FileSystem.get(conf); String localPaths = StartupProperties.get().getProperty("system.lib.location"); assert localPaths != null && !localPaths.isEmpty() : "Invalid value for system.lib.location"; if (!new File(localPaths).isDirectory()) { LOG.warn(localPaths + " configured for system.lib.location doesn't contain any valid libs"); return;//from w w w.j a v a 2s . c o m } for (File localFile : new File(localPaths).listFiles()) { Path clusterFile = new Path(path, localFile.getName()); if (!pathFilter.accept(clusterFile)) continue; if (fs.exists(clusterFile)) { FileStatus fstat = fs.getFileStatus(clusterFile); if (fstat.getLen() == localFile.length() && fstat.getModificationTime() == localFile.lastModified()) continue; } fs.copyFromLocalFile(false, true, new Path(localFile.getAbsolutePath()), clusterFile); fs.setTimes(clusterFile, localFile.lastModified(), System.currentTimeMillis()); LOG.info("Copied " + localFile.getAbsolutePath() + " to " + path + " in " + fs.getUri()); } }
From source file:org.apache.mahout.clustering.display.ClustersFilterTest.java
License:Apache License
@Test public void testAcceptNotFinal() throws Exception { Path path0 = new Path(output, "clusters-0"); Path path1 = new Path(output, "clusters-1"); path0.getFileSystem(configuration).createNewFile(path0); path1.getFileSystem(configuration).createNewFile(path1); PathFilter clustersFilter = new ClustersFilter(); assertTrue(clustersFilter.accept(path0)); assertTrue(clustersFilter.accept(path1)); }
From source file:org.apache.mahout.clustering.display.ClustersFilterTest.java
License:Apache License
@Test public void testAcceptFinalPath() throws IOException { Path path0 = new Path(output, "clusters-0"); Path path1 = new Path(output, "clusters-1"); Path path2 = new Path(output, "clusters-2"); Path path3Final = new Path(output, "clusters-3-final"); path0.getFileSystem(configuration).createNewFile(path0); path1.getFileSystem(configuration).createNewFile(path1); path2.getFileSystem(configuration).createNewFile(path2); path3Final.getFileSystem(configuration).createNewFile(path3Final); PathFilter clustersFilter = new ClustersFilter(); assertTrue(clustersFilter.accept(path0)); assertTrue(clustersFilter.accept(path1)); assertTrue(clustersFilter.accept(path2)); assertTrue(clustersFilter.accept(path3Final)); }
From source file:org.apache.nifi.processors.hadoop.inotify.TestNotificationEventPathFilter.java
License:Apache License
@Test public void acceptPathShouldProperlyIgnorePathsWhereTheLastComponentStartsWithADot() throws Exception { PathFilter filter = new NotificationEventPathFilter(Pattern.compile(".*"), true); assertFalse(filter.accept(new Path("/.some_hidden_file"))); assertFalse(filter.accept(new Path("/some/long/path/.some_hidden_file/"))); }
From source file:org.apache.nifi.processors.hadoop.inotify.TestNotificationEventPathFilter.java
License:Apache License
@Test public void acceptPathShouldProperlyAcceptPathsWhereTheNonLastComponentStartsWithADot() throws Exception { PathFilter filter = new NotificationEventPathFilter(Pattern.compile(".*"), true); assertTrue(filter.accept(new Path("/some/long/path/.some_hidden_file/should/work"))); assertTrue(filter.accept(new Path("/.some_hidden_file/should/still/accept"))); }
From source file:org.apache.nifi.processors.hadoop.inotify.TestNotificationEventPathFilter.java
License:Apache License
@Test public void acceptPathShouldProperlyMatchAllSubdirectoriesThatMatchWatchDirectoryAndFileFilter() throws Exception { PathFilter filter = new NotificationEventPathFilter(Pattern.compile("/root(/.*)?"), true); assertTrue(filter.accept(new Path("/root/sometest.txt"))); }