List of usage examples for org.apache.hadoop.fs PathFilter accept
boolean accept(Path path);
From source file:de.zib.sfs.StatisticsFileSystem.java
License:BSD License
@Override public FileStatus[] listStatus(Path[] path, PathFilter filter) throws FileNotFoundException, IOException { UnwrappedPath[] unwrappedPaths = new UnwrappedPath[path.length]; for (int i = 0; i < path.length; ++i) { unwrappedPaths[i] = unwrapPath(path[i]); }/*from w ww. jav a2 s .c om*/ PathFilter wrappedFilter = new PathFilter() { @Override public boolean accept(Path p) { return filter.accept(unwrapPath(p)); } }; FileStatus[] fileStatuses = this.wrappedFS.listStatus(unwrappedPaths, wrappedFilter); for (int i = 0; i < fileStatuses.length; ++i) { if (unwrappedPaths[i].isUnwrapped()) { fileStatuses[i] .setPath(setAuthority(wrapPath(fileStatuses[i].getPath()), path[i].toUri().getAuthority())); } } return fileStatuses; }
From source file:gobblin.data.management.copy.replication.ConfigBasedDataset.java
License:Apache License
@Override public Collection<? extends CopyEntity> getCopyableFiles(FileSystem targetFs, CopyConfiguration copyConfiguration) throws IOException { List<CopyEntity> copyableFiles = Lists.newArrayList(); EndPoint copyFromRaw = copyRoute.getCopyFrom(); EndPoint copyToRaw = copyRoute.getCopyTo(); if (!(copyFromRaw instanceof HadoopFsEndPoint && copyToRaw instanceof HadoopFsEndPoint)) { log.warn("Currently only handle the Hadoop Fs EndPoint replication"); return copyableFiles; }// w ww.j a v a 2 s.c o m if ((!copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent()) || (copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent() && copyFromRaw.getWatermark().get().compareTo(copyToRaw.getWatermark().get()) <= 0)) { log.info( "No need to copy as destination watermark >= source watermark with source watermark {}, for dataset with metadata {}", copyFromRaw.getWatermark().isPresent() ? copyFromRaw.getWatermark().get().toJson() : "N/A", this.rc.getMetaData()); return copyableFiles; } HadoopFsEndPoint copyFrom = (HadoopFsEndPoint) copyFromRaw; HadoopFsEndPoint copyTo = (HadoopFsEndPoint) copyToRaw; Configuration conf = HadoopUtils.newConfiguration(); FileSystem copyFromFs = FileSystem.get(copyFrom.getFsURI(), conf); FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf); Collection<FileStatus> allFilesInSource = copyFrom.getFiles(); Collection<FileStatus> allFilesInTarget = copyTo.getFiles(); final PathFilter pathFilter = DatasetUtils.instantiatePathFilter(this.props); Predicate<FileStatus> predicate = new Predicate<FileStatus>() { @Override public boolean apply(FileStatus input) { return pathFilter.accept(input.getPath()); } }; Set<FileStatus> copyFromFileStatuses = Sets.newHashSet(Collections2.filter(allFilesInSource, predicate)); Map<Path, FileStatus> copyToFileMap = Maps.newHashMap(); for (FileStatus f : allFilesInTarget) { if (pathFilter.accept(f.getPath())) { copyToFileMap.put(PathUtils.getPathWithoutSchemeAndAuthority(f.getPath()), f); } } Collection<Path> deletedPaths = Lists.newArrayList(); boolean watermarkMetadataCopied = false; boolean deleteTargetIfNotExistOnSource = rc.isDeleteTargetIfNotExistOnSource(); for (FileStatus originFileStatus : copyFromFileStatuses) { Path relative = PathUtils.relativizePath( PathUtils.getPathWithoutSchemeAndAuthority(originFileStatus.getPath()), PathUtils.getPathWithoutSchemeAndAuthority(copyFrom.getDatasetPath())); // construct the new path in the target file system Path newPath = new Path(copyTo.getDatasetPath(), relative); if (relative.toString().equals(ReplicaHadoopFsEndPoint.WATERMARK_FILE)) { watermarkMetadataCopied = true; } // skip copy same file if (copyToFileMap.containsKey(newPath) && copyToFileMap.get(newPath).getLen() == originFileStatus.getLen() && copyToFileMap.get(newPath).getModificationTime() > originFileStatus.getModificationTime()) { log.debug( "Copy from timestamp older than copy to timestamp, skipped copy {} for dataset with metadata {}", originFileStatus.getPath(), this.rc.getMetaData()); } else { // need to remove those files in the target File System if (copyToFileMap.containsKey(newPath)) { deletedPaths.add(newPath); } copyableFiles.add(CopyableFile .fromOriginAndDestination(copyFromFs, originFileStatus, copyToFs.makeQualified(newPath), copyConfiguration) .fileSet(PathUtils.getPathWithoutSchemeAndAuthority(copyTo.getDatasetPath()).toString()) .build()); } // clean up already checked paths copyToFileMap.remove(newPath); } // delete the paths on target directory if NOT exists on source if (deleteTargetIfNotExistOnSource) { deletedPaths.addAll(copyToFileMap.keySet()); } // delete old files first if (!deletedPaths.isEmpty()) { DeleteFileCommitStep deleteCommitStep = DeleteFileCommitStep.fromPaths(copyToFs, deletedPaths, this.props); copyableFiles.add(new PrePublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(), deleteCommitStep, 0)); } // generate the watermark file if ((!watermarkMetadataCopied) && copyFrom.getWatermark().isPresent()) { copyableFiles .add(new PostPublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(), new WatermarkMetadataGenerationCommitStep(copyTo.getFsURI().toString(), copyTo.getDatasetPath(), copyFrom.getWatermark().get()), 1)); } return copyableFiles; }
From source file:gobblin.util.FileListUtils.java
License:Apache License
private static List<FileStatus> listFilesRecursivelyHelper(FileSystem fs, List<FileStatus> files, FileStatus fileStatus, PathFilter fileFilter, boolean applyFilterToDirectories) throws FileNotFoundException, IOException { if (fileStatus.isDirectory()) { for (FileStatus status : fs.listStatus(fileStatus.getPath(), applyFilterToDirectories ? fileFilter : NO_OP_PATH_FILTER)) { if (fileStatus.isDirectory()) { listFilesRecursivelyHelper(fs, files, status, fileFilter, applyFilterToDirectories); } else { files.add(fileStatus);/*from w w w.j ava 2s.c o m*/ } } } else if (fileFilter.accept(fileStatus.getPath())) { files.add(fileStatus); } return files; }
From source file:gobblin.util.FileListUtils.java
License:Apache License
private static List<FileStatus> listMostNestedPathRecursivelyHelper(FileSystem fs, List<FileStatus> files, FileStatus fileStatus, PathFilter fileFilter) throws IOException { if (fileStatus.isDirectory()) { FileStatus[] curFileStatus = fs.listStatus(fileStatus.getPath()); if (ArrayUtils.isEmpty(curFileStatus)) { files.add(fileStatus);//from w w w .jav a2 s .c om } else { for (FileStatus status : curFileStatus) { listMostNestedPathRecursivelyHelper(fs, files, status, fileFilter); } } } else if (fileFilter.accept(fileStatus.getPath())) { files.add(fileStatus); } return files; }
From source file:gobblin.util.FileListUtils.java
License:Apache License
private static List<FileStatus> listPathsRecursivelyHelper(FileSystem fs, List<FileStatus> files, FileStatus fileStatus, PathFilter fileFilter) { if (fileFilter.accept(fileStatus.getPath())) { files.add(fileStatus);/*from w w w . j a v a2s . com*/ } if (fileStatus.isDirectory()) { try { for (FileStatus status : fs.listStatus(fileStatus.getPath())) { listPathsRecursivelyHelper(fs, files, status, fileFilter); } } catch (IOException ioe) { LOG.error("Could not list contents of path " + fileStatus.getPath()); } } return files; }
From source file:gobblin.util.filters.AndPathFilter.java
License:Apache License
@Override public boolean accept(Path path) { for (PathFilter filter : this.pathFilters) { if (!filter.accept(path)) { return false; }/*from ww w .ja v a 2 s.co m*/ } return true; }
From source file:io.confluent.connect.hdfs.FileUtils.java
License:Apache License
private static ArrayList<FileStatus> traverseImpl(Storage storage, Path path, PathFilter filter) throws IOException { if (!storage.exists(path.toString())) { return new ArrayList<>(); }/* w ww .jav a 2 s . co m*/ ArrayList<FileStatus> result = new ArrayList<>(); FileStatus[] statuses = storage.listStatus(path.toString()); for (FileStatus status : statuses) { if (status.isDirectory()) { result.addAll(traverseImpl(storage, status.getPath(), filter)); } else { if (filter.accept(status.getPath())) { result.add(status); } } } return result; }
From source file:io.confluent.connect.hdfs.utils.MemoryStorage.java
License:Apache License
@Override public FileStatus[] listStatus(String path, PathFilter filter) throws IOException { if (failure == Failure.listStatusFailure) { failure = Failure.noFailure;//from www . jav a 2 s . c om throw new IOException("listStatus failed."); } List<FileStatus> result = new ArrayList<>(); for (String key : data.keySet()) { if (key.startsWith(path) && filter.accept(new Path(key))) { FileStatus status = new FileStatus(data.get(key).size(), false, 1, 0, 0, 0, null, null, null, new Path(key)); result.add(status); } } return result.toArray(new FileStatus[result.size()]); }
From source file:kogiri.mapreduce.common.kmermatch.KmerMatchInputFormat.java
License:Open Source License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); }/* ww w . j av a2 s .c o m*/ // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } filters.add(new KmerIndexIndexPathFilter()); PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; if (inputFilter.accept(p)) { FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus status = fs.getFileStatus(p); result.add(status); } } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:kogiri.mapreduce.preprocess.common.kmerindex.KmerIndexInputFormat.java
License:Open Source License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); }/*from w w w . jav a 2s . co m*/ // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } filters.add(new KmerIndexPartPathFilter()); PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; if (inputFilter.accept(p)) { FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus status = fs.getFileStatus(p); result.add(status); } } LOG.info("Total input paths to process : " + result.size()); return result; }