Example usage for org.apache.hadoop.fs PathFilter accept

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter accept.

Prototype

boolean accept(Path path);

Source Link

Document

Tests whether or not the specified abstract pathname should be included in a pathname list.

Usage

From source file:com.ibm.stocator.fs.cos.COSAPIClient.java

License:Apache License

/**
 * {@inheritDoc}//from w  w w . ja va2  s  .c  om
 *
 * Prefix based
 * Return everything that starts with the prefix
 * Fill listing
 * Return all objects, even zero size
 * If fileStatus is null means the path is part of some name, neither object
 * or pseudo directory. Was called by Globber
 *
 * @param hostName hostName
 * @param path path
 * @param fullListing Return all objects, even zero size
 * @param prefixBased Return everything that starts with the prefix
 * @return list
 * @throws IOException if error
 */
/*
public FileStatus[] list(String hostName, Path path, boolean fullListing,
    boolean prefixBased) throws IOException {
  String key = pathToKey(hostName, path);
  ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>();
  ListObjectsRequest request = new ListObjectsRequest().withBucketName(mBucket).withPrefix(key);
        
  String curObj;
  if (path.toString().equals(mBucket)) {
    curObj = "";
  } else if (path.toString().startsWith(mBucket + "/")) {
    curObj = path.toString().substring(mBucket.length() + 1);
  } else if (path.toString().startsWith(hostName)) {
    curObj = path.toString().substring(hostName.length());
  } else {
    curObj = path.toString();
  }
        
  ObjectListing objectList = mClient.listObjects(request);
  List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries();
  if (objectSummaries.size() == 0) {
    FileStatus[] emptyRes = {};
    LOG.debug("List for bucket {} is empty", mBucket);
    return emptyRes;
  }
  boolean objectScanContinue = true;
  S3ObjectSummary prevObj = null;
  while (objectScanContinue) {
    for (S3ObjectSummary obj : objectSummaries) {
if (prevObj == null) {
  prevObj = obj;
  continue;
}
String objKey = obj.getKey();
String unifiedObjectName = extractUnifiedObjectName(objKey);
if (!prefixBased && !curObj.equals("") && !path.toString().endsWith("/")
    && !unifiedObjectName.equals(curObj) && !unifiedObjectName.startsWith(curObj + "/")) {
  LOG.trace("{} does not match {}. Skipped", unifiedObjectName, curObj);
  continue;
}
if (isSparkOrigin(unifiedObjectName) && !fullListing) {
  LOG.trace("{} created by Spark", unifiedObjectName);
  if (!isJobSuccessful(unifiedObjectName)) {
    LOG.trace("{} created by failed Spark job. Skipped", unifiedObjectName);
    if (fModeAutomaticDelete) {
      delete(hostName, new Path(objKey), true);
    }
    continue;
  } else {
    // if we here - data created by spark and job completed
    // successfully
    // however there be might parts of failed tasks that
    // were not aborted
    // we need to make sure there are no failed attempts
    if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) {
      // found failed that was not aborted.
      LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey);
      if (prevObj.getSize() < obj.getSize()) {
        LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey());
        prevObj = obj;
      }
      continue;
    }
  }
}
if (prevObj.getSize() > 0 || fullListing) {
  FileStatus fs = getFileStatusObjSummaryBased(prevObj, hostName, path);
  tmpResult.add(fs);
}
prevObj = obj;
    }
    boolean isTruncated = objectList.isTruncated();
    if (isTruncated) {
objectList = mClient.listNextBatchOfObjects(objectList);
objectSummaries = objectList.getObjectSummaries();
    } else {
objectScanContinue = false;
    }
  }
  if (prevObj != null && (prevObj.getSize() > 0 || fullListing)) {
    FileStatus fs = getFileStatusObjSummaryBased(prevObj, hostName, path);
    tmpResult.add(fs);
  }
  if (LOG.isTraceEnabled()) {
    LOG.trace("COS List to return length {}", tmpResult.size());
    for (FileStatus fs: tmpResult) {
LOG.trace("{}", fs.getPath());
    }
  }
  return tmpResult.toArray(new FileStatus[tmpResult.size()]);
}
*/
@Override
public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased,
        Boolean isDirectory, boolean flatListing, PathFilter filter) throws FileNotFoundException, IOException {
    LOG.debug("Native direct list status for {}", path);
    ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>();
    String key = pathToKey(hostName, path);
    if (isDirectory != null && isDirectory.booleanValue() && !key.endsWith("/")) {
        key = key + "/";
        LOG.debug("listNativeDirect modify key to {}", key);
    }

    Map<String, FileStatus> emptyObjects = new HashMap<String, FileStatus>();
    ListObjectsRequest request = new ListObjectsRequest();
    request.setBucketName(mBucket);
    request.setMaxKeys(5000);
    request.setPrefix(key);
    if (!flatListing) {
        request.setDelimiter("/");
    }

    ObjectListing objectList = mClient.listObjects(request);

    List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries();
    List<String> commonPrefixes = objectList.getCommonPrefixes();

    boolean objectScanContinue = true;
    S3ObjectSummary prevObj = null;
    // start FTA logic
    boolean stocatorOrigin = isSparkOrigin(key, path.toString());
    if (stocatorOrigin) {
        LOG.debug("Stocator origin is true for {}", key);
        if (!isJobSuccessful(key)) {
            LOG.debug("{} created by failed Spark job. Skipped", key);
            if (fModeAutomaticDelete) {
                delete(hostName, new Path(key), true);
            }
            return new FileStatus[0];
        }
    }
    while (objectScanContinue) {
        for (S3ObjectSummary obj : objectSummaries) {
            if (prevObj == null) {
                prevObj = obj;
                continue;
            }
            String objKey = obj.getKey();
            String unifiedObjectName = extractUnifiedObjectName(objKey);
            LOG.debug("list candidate {}, unified name {}", objKey, unifiedObjectName);
            if (stocatorOrigin && !fullListing) {
                LOG.trace("{} created by Spark", unifiedObjectName);
                // if we here - data created by spark and job completed
                // successfully
                // however there be might parts of failed tasks that
                // were not aborted
                // we need to make sure there are no failed attempts
                if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) {
                    // found failed that was not aborted.
                    LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey);
                    if (prevObj.getSize() < obj.getSize()) {
                        LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey());
                        prevObj = obj;
                    }
                    continue;
                }
            }
            FileStatus fs = createFileStatus(prevObj, hostName, path);
            if (fs.getLen() > 0 || fullListing) {
                LOG.debug("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen());
                if (filter == null) {
                    tmpResult.add(fs);
                } else if (filter != null && filter.accept(fs.getPath())) {
                    tmpResult.add(fs);
                } else {
                    LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
                }
            } else {
                emptyObjects.put(fs.getPath().toString(), fs);
            }
            prevObj = obj;
        }
        boolean isTruncated = objectList.isTruncated();
        if (isTruncated) {
            objectList = mClient.listNextBatchOfObjects(objectList);
            objectSummaries = objectList.getObjectSummaries();
        } else {
            objectScanContinue = false;
        }
    }

    if (prevObj != null) {
        FileStatus fs = createFileStatus(prevObj, hostName, path);
        LOG.debug("Adding the last object from the list {}", fs.getPath());
        if (fs.getLen() > 0 || fullListing) {
            LOG.debug("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen());
            if (filter == null) {
                memoryCache.putFileStatus(fs.getPath().toString(), fs);
                tmpResult.add(fs);
            } else if (filter != null && filter.accept(fs.getPath())) {
                memoryCache.putFileStatus(fs.getPath().toString(), fs);
                tmpResult.add(fs);
            } else {
                LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
            }
        } else if (!fs.getPath().getName().equals(HADOOP_SUCCESS)) {
            emptyObjects.put(fs.getPath().toString(), fs);
        }
    }

    // get common prefixes
    for (String comPrefix : commonPrefixes) {
        LOG.debug("Common prefix is {}", comPrefix);
        if (emptyObjects.containsKey(keyToQualifiedPath(hostName, comPrefix).toString())
                || emptyObjects.isEmpty()) {
            FileStatus status = new COSFileStatus(true, false, keyToQualifiedPath(hostName, comPrefix));
            LOG.debug("Match between common prefix and empty object {}. Adding to result", comPrefix);
            if (filter == null) {
                memoryCache.putFileStatus(status.getPath().toString(), status);
                tmpResult.add(status);
            } else if (filter != null && filter.accept(status.getPath())) {
                memoryCache.putFileStatus(status.getPath().toString(), status);
                tmpResult.add(status);
            } else {
                LOG.trace("Common prefix {} rejected by path filter during list. Filter {}", status.getPath(),
                        filter);
            }
        }
    }
    return tmpResult.toArray(new FileStatus[tmpResult.size()]);
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter,
        boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];//from  w  ww .j a va  2  s .  co m
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath());
                    while (iter.hasNext()) {
                        LocatedFileStatus stat = iter.next();
                        if (inputFilter.accept(stat.getPath())) {
                            if (recursive && stat.isDirectory()) {
                                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                            } else {
                                result.add(stat);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

/**
 * Add files in the input path recursively into the results.
 * @param result//from   w w  w . j a  v a  2s  .c  o  m
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
        PathFilter inputFilter) throws IOException {
    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
    while (iter.hasNext()) {
        LocatedFileStatus stat = iter.next();
        if (inputFilter.accept(stat.getPath())) {
            if (stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
            } else {
                result.add(stat);
            }
        }
    }
}

From source file:com.mongodb.hadoop.BSONFileInputFormat.java

License:Apache License

@Override
public List<FileSplit> getSplits(final JobContext context) throws IOException {
    Configuration config = context.getConfiguration();
    PathFilter pf = getInputPathFilter(context);
    ArrayList<FileSplit> splits = new ArrayList<FileSplit>();
    List<FileStatus> inputFiles = listStatus(context);
    for (FileStatus file : inputFiles) {
        if (pf != null && !pf.accept(file.getPath())) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("skipping file %s not matched path filter.", file.getPath()));
            }//w ww .  ja va2 s  .  co  m
            continue;
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("processing file " + file.getPath());
            }
        }

        BSONSplitter splitter = new BSONSplitter();
        splitter.setConf(config);
        splitter.setInputPath(file.getPath());
        Path splitFilePath = new Path(file.getPath().getParent(), "." + file.getPath().getName() + ".splits");
        try {
            splitter.loadSplitsFromSplitFile(file, splitFilePath);
        } catch (BSONSplitter.NoSplitFileException nsfe) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("No split file for %s; building split file", file.getPath()));
            }
            splitter.readSplitsForFile(file);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("BSONSplitter found %d splits.", splitter.getAllSplits().size()));
        }
        splits.addAll(splitter.getAllSplits());
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(String.format("Total of %d found.", splits.size()));
    }
    return splits;
}

From source file:com.netflix.aegisthus.tools.ChainedPathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    for (PathFilter pf : pfs) {
        if (!pf.accept(path)) {
            return false;
        }//from w  w w  .jav a  2s  .com
    }
    return true;
}

From source file:com.netflix.bdp.s3.S3PartitionedOutputCommitter.java

License:Apache License

@Override
protected List<FileStatus> getTaskOutput(TaskAttemptContext context) throws IOException {
    PathFilter filter = HiddenPathFilter.get();

    // get files on the local FS in the attempt path
    Path attemptPath = getTaskAttemptPath(context);
    FileSystem attemptFS = attemptPath.getFileSystem(context.getConfiguration());
    RemoteIterator<LocatedFileStatus> iter = attemptFS.listFiles(attemptPath, true /* recursive */ );

    List<FileStatus> stats = Lists.newArrayList();
    while (iter.hasNext()) {
        FileStatus stat = iter.next();//w ww .  j a  v  a2  s.c o  m
        if (filter.accept(stat.getPath())) {
            stats.add(stat);
        }
    }

    return stats;
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

/**
 * @param result contains the list of FileStatus passed the filtering conditions;
 * @param fs//  w  w  w . java 2  s. c  o m
 * @param path
 * @param dirFilter : filter works on directories only;
 * @param fileFilter: filer works on files only;
 * @throws IOException
 */
public static void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
        PathFilter dirFilter, PathFilter fileFilter) throws IOException {
    FileStatus[] stats = fs.listStatus(path);
    if (stats != null) {
        for (FileStatus stat : stats) {
            if (stat.isDir() && dirFilter.accept(stat.getPath())) {
                addInputPathRecursively(result, fs, stat.getPath(), dirFilter, fileFilter);
            } else {
                if (fileFilter.accept(stat.getPath())) {
                    result.add(stat);
                }
            }
        }
    }
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its
 * subdirs are skipped/*from w  w  w  . ja  va2s.c o m*/
 * @param fs           File System
 * @param basePathStr  Base-Path
 * @param consumer     Callback for processing
 * @param excludeMetaFolder Exclude .hoodie folder
 * @throws IOException
 */
@VisibleForTesting
static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
        boolean excludeMetaFolder) throws IOException {
    PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER;
    FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr));
    for (int i = 0; i < topLevelStatuses.length; i++) {
        FileStatus child = topLevelStatuses[i];
        if (child.isFile()) {
            boolean success = consumer.apply(child);
            if (!success) {
                throw new HoodieException("Failed to process file-status=" + child);
            }
        } else if (pathFilter.accept(child.getPath())) {
            RemoteIterator<LocatedFileStatus> itr = fs.listFiles(child.getPath(), true);
            while (itr.hasNext()) {
                FileStatus status = itr.next();
                boolean success = consumer.apply(status);
                if (!success) {
                    throw new HoodieException("Failed to process file-status=" + status);
                }
            }
        }
    }
}

From source file:de.zib.sfs.StatisticsFileSystem.java

License:BSD License

@Override
public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException {
    PathFilter wrappedFilter = new PathFilter() {
        @Override//w  w w .  ja v  a2 s.c o  m
        public boolean accept(Path path) {
            return filter.accept(unwrapPath(path));
        }
    };

    UnwrappedPath unwrappedPathPattern = unwrapPath(pathPattern);
    FileStatus[] fileStatuses = this.wrappedFS.globStatus(unwrappedPathPattern, wrappedFilter);
    if (fileStatuses == null) {
        return null;
    }
    if (unwrappedPathPattern.isUnwrapped()) {
        for (FileStatus fileStatus : fileStatuses) {
            fileStatus
                    .setPath(setAuthority(wrapPath(fileStatus.getPath()), pathPattern.toUri().getAuthority()));
        }
    }
    return fileStatuses;
}

From source file:de.zib.sfs.StatisticsFileSystem.java

License:BSD License

@Override
public FileStatus[] listStatus(Path f, PathFilter filter) throws FileNotFoundException, IOException {
    long startTime = System.nanoTime();
    UnwrappedPath unwrappedPath = unwrapPath(f);
    PathFilter wrappedFilter = new PathFilter() {
        @Override/*www. jav a  2s  .com*/
        public boolean accept(Path path) {
            return filter.accept(unwrapPath(path));
        }
    };

    FileStatus[] fileStatuses = this.wrappedFS.listStatus(unwrappedPath, wrappedFilter);
    if (unwrappedPath.isUnwrapped()) {
        for (FileStatus fileStatus : fileStatuses) {
            fileStatus.setPath(setAuthority(wrapPath(fileStatus.getPath()), f.toUri().getAuthority()));
        }
    }
    if (!this.skipOther) {
        int fd = LiveOperationStatisticsAggregator.instance.registerFileDescriptor(f.toString());
        LiveOperationStatisticsAggregator.instance.aggregateOperationStatistics(OperationSource.SFS,
                OperationCategory.OTHER, startTime, System.nanoTime(), fd);
    }
    return fileStatuses;
}