Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Queries the filesystem to load the file block metadata (e.g. DFS blocks) for the
 * given file. Adds the newly created block metadata and block location to the
 * perFsFileBlocks, so that the storage IDs for each block can be retrieved from
 * BlockLocation.//from w  ww  .  j ava 2  s.c  o  m
 *
 * Must be threadsafe. Access to 'perFsFileBlocks' and 'hostIndex' must be protected.
 */
private static void loadBlockMetadata(FileSystem fs, FileStatus file, FileDescriptor fd,
        HdfsFileFormat fileFormat, Map<FsKey, FileBlocksInfo> perFsFileBlocks, String tblName,
        ListMap<TNetworkAddress> hostIndex) {
    Preconditions.checkNotNull(fd);
    Preconditions.checkNotNull(perFsFileBlocks);
    Preconditions.checkArgument(!file.isDirectory());
    LOG.debug("load block md for " + tblName + " file " + fd.getFileName());

    if (!FileSystemUtil.hasGetFileBlockLocations(fs)) {
        synthesizeBlockMetadata(file, fd, fileFormat, hostIndex);
        return;
    }
    try {
        BlockLocation[] locations = null;
        if (file instanceof LocatedFileStatus) {
            locations = ((LocatedFileStatus) file).getBlockLocations();
        } else {
            locations = fs.getFileBlockLocations(file, 0, file.getLen());
        }
        Preconditions.checkNotNull(locations);

        // Loop over all blocks in the file.
        for (BlockLocation loc : locations) {
            Preconditions.checkNotNull(loc);
            fd.addFileBlock(createFileBlock(loc, hostIndex));
        }

        // Remember the THdfsFileBlocks and corresponding BlockLocations. Once all the
        // blocks are collected, the disk IDs will be queried in one batch per filesystem.
        FsKey fsKey = new FsKey(fs);
        synchronized (perFsFileBlocks) {
            FileBlocksInfo infos = perFsFileBlocks.get(fsKey);
            if (infos == null) {
                infos = new FileBlocksInfo();
                perFsFileBlocks.put(fsKey, infos);
            }
            infos.addBlocks(fd.getFileBlocks(), Arrays.asList(locations));
        }
    } catch (IOException e) {
        throw new RuntimeException(
                "couldn't determine block locations for path '" + file.getPath() + "':\n" + e.getMessage(), e);
    }
}

From source file:com.cloudera.impala.util.TestLoadMetadataUtil.java

License:Apache License

/**
 * Test if it returns the same file descriptor when the filepath is a normal file with
 * cache./*from  w  w  w  .  j  a  v  a  2 s . c  om*/
 */
private void testFileWithCache(MethodName methodName) throws IOException {
    Map<FsKey, FileBlocksInfo> perFsFileBlocks = Maps.newHashMap();
    Map<String, List<FileDescriptor>> fileDescMap = Maps.newHashMap();

    // Create old file description map
    Path cacheFilePath = createFileInHdfs("fileWithCache");
    Map<String, List<FileDescriptor>> oldFileDescMap = Maps.newHashMap();
    List<FileDescriptor> cacheList = new LinkedList<FileDescriptor>();
    FileStatus fileStatus = fs_.getFileStatus(cacheFilePath);
    FileDescriptor fdInCache = new FileDescriptor(cacheFilePath.getName(), fileStatus.getLen(),
            fileStatus.getModificationTime());
    cacheList.add(fdInCache);
    oldFileDescMap.put(fileStatus.getPath().getParent().toString(), cacheList);
    List<FileDescriptor> fileDesclist = null;
    switch (methodName) {
    case LOAD_FILE_DESCRIPTORS:
        fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap,
                HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap);
        break;
    case LOAD_VIA_LOCATED_FILE_STATUS:
        fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap,
                HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap);
        break;
    case LOAD_VIA_LIST_STATUS_ITERATOR:
        fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap,
                HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap);
        break;
    default:
        LOG.error("Unsupported enum method name");
        Preconditions.checkState(false);
    }

    for (FsKey key : perFsFileBlocks.keySet()) {
        assertEquals(HDFS_BASE_PATH, key.toString());
    }
    assertEquals(1, fileDesclist.size());
    assertEquals(fdInCache, fileDesclist.get(0));
}

From source file:com.cloudera.oryx.common.servcomp.Store.java

License:Open Source License

/**
 * @param dirKey location of directory whose contents will be downloaded
 * @param dir local {@link File} to store files/directories under
 * @throws IOException if an error occurs while downloading
 *//* w  ww .ja v  a2  s.c  o m*/
public void downloadDirectory(String dirKey, File dir) throws IOException {
    Preconditions.checkNotNull(dirKey);
    Preconditions.checkNotNull(dir);
    Preconditions.checkArgument(dir.exists() && dir.isDirectory(), "Not a directory: %s", dir);

    Path dirPath = Namespaces.toPath(dirKey);
    if (!fs.exists(dirPath)) {
        return;
    }
    Preconditions.checkArgument(fs.getFileStatus(dirPath).isDirectory(), "Not a directory: %s", dirPath);

    boolean complete = false;
    try {
        for (FileStatus status : fs.listStatus(dirPath)) {
            String name = status.getPath().getName();
            String fromKey = dirKey + '/' + name;
            File toLocal = new File(dir, name);
            if (status.isFile()) {
                download(fromKey, toLocal);
            } else {
                boolean success = toLocal.mkdir();
                if (!success && !toLocal.exists()) {
                    throw new IOException("Can't make " + toLocal);
                }
                downloadDirectory(fromKey, toLocal);
            }
        }
        complete = true;
    } finally {
        if (!complete) {
            log.warn("Failed to download {} so deleting {}", dirKey, dir);
            IOUtils.deleteRecursively(dir);
        }
    }
}

From source file:com.cloudera.oryx.common.servcomp.Store.java

License:Open Source License

/**
 * Lists contents of a directory. For file systems without a notion of directory, this lists prefixes that
 * have the same prefix as the given prefix, but excludes "directories" (keys with same prefix, but followed
 * by more path elements). Results are returned in lexicographically sorted order.
 *
 * @param prefix directory to list//  ww w  . j  a v a  2 s .c om
 * @param files if true, only list files, not directories
 * @return list of keys representing directory contents
 */
public List<String> list(String prefix, boolean files) throws IOException {
    Preconditions.checkNotNull(prefix);
    Path path = Namespaces.toPath(prefix);
    if (!fs.exists(path)) {
        return Collections.emptyList();
    }

    Preconditions.checkArgument(fs.getFileStatus(path).isDirectory(), "Not a directory: %s", path);
    FileStatus[] statuses = fs.listStatus(path, new FilesOrDirsPathFilter(fs, files));
    String prefixString = Namespaces.get().getPrefix();

    List<String> result = Lists.newArrayListWithCapacity(statuses.length);
    for (FileStatus fileStatus : statuses) {
        String listPath = fileStatus.getPath().toString();
        Preconditions.checkState(listPath.startsWith(prefixString), "%s doesn't start with %s", listPath,
                prefixString);
        if (!listPath.endsWith("_SUCCESS")) {
            listPath = listPath.substring(prefixString.length());
            if (fileStatus.isDirectory() && !listPath.endsWith("/")) {
                listPath += "/";
            }
            result.add(listPath);
        }
    }
    Collections.sort(result);
    return result;
}

From source file:com.cloudera.oryx.lambda.batch.BatchUpdateFunction.java

License:Open Source License

/**
 * @return paths from {@link FileStatus}es into one comma-separated String
 * @see FileInputFormat#addInputPath(org.apache.hadoop.mapreduce.Job, Path)
 *//*from  w  ww  .j  av  a2  s. c om*/
private static String joinFSPaths(FileSystem fs, FileStatus[] statuses) {
    StringBuilder joined = new StringBuilder();
    for (FileStatus status : statuses) {
        if (joined.length() > 0) {
            joined.append(',');
        }
        Path path = fs.makeQualified(status.getPath());
        joined.append(StringUtils.escapeString(path.toString()));
    }
    return joined.toString();
}

From source file:com.cloudera.recordbreaker.analyzer.FSAnalyzer.java

License:Open Source License

/**
 * <code>addFileMetadata</code> stores the pathname, size, owner, etc.
 *///from  w w w .j a  v  a2 s.  co  m
void addFileMetadata(final FileStatus fstatus, final long crawlId) {
    // Compute strings to represent file metadata
    Path insertFile = fstatus.getPath();
    final boolean isDir = fstatus.isDir();
    FsPermission fsp = fstatus.getPermission();
    final String permissions = (isDir ? "d" : "-") + fsp.getUserAction().SYMBOL + fsp.getGroupAction().SYMBOL
            + fsp.getOtherAction().SYMBOL;

    // Compute formal pathname representation
    String fnameString = null;
    String parentPathString = null;
    if (isDir && insertFile.getParent() == null) {
        parentPathString = "";
        fnameString = insertFile.toString();
    } else {
        fnameString = insertFile.getName();
        parentPathString = insertFile.getParent().toString();

        // REMIND --- mjc --- If we want to modify the Files table s.t. it does
        // not contain the filesystem prefix, then this would be the place to do it.

        if (!parentPathString.endsWith("/")) {
            parentPathString = parentPathString + "/";
        }
    }
    final String parentPath = parentPathString;
    final String fName = fnameString;
    final long fileId = dbQueue.execute(new SQLiteJob<Long>() {
        protected Long job(SQLiteConnection db) throws SQLiteException {
            SQLiteStatement stmt = db.prepare("INSERT into Files VALUES(null, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
            try {
                stmt.bind(1, isDir ? "True" : "False").bind(2, crawlId).bind(3, fName)
                        .bind(4, fstatus.getOwner()).bind(5, fstatus.getGroup()).bind(6, permissions)
                        .bind(7, fstatus.getLen())
                        .bind(8, fileDateFormat.format(new Date(fstatus.getModificationTime())))
                        .bind(9, parentPath);
                stmt.step();
                return db.getLastInsertId();
            } finally {
                stmt.dispose();
            }
        }
    }).complete();
}

From source file:com.cloudera.recordbreaker.analyzer.FSCrawler.java

License:Open Source License

/**
 * Traverse an entire region of the filesystem, analyzing files.
 * This code should://from   w  w w  .java 2 s .  com
 * a) Navigate the directory hierarchy
 * b) Run analysis code to figure out the file details
 * c) Invoke addSingleFile() appropriately.
 */
protected void recursiveCrawlBuildList(FileSystem fs, Path p, int subdirDepth, long crawlId,
        List<Path> todoFileList, List<Path> todoDirList) throws IOException {
    FileStatus fstatus = fs.getFileStatus(p);
    if (!fstatus.isDir()) {
        todoFileList.add(p);
    } else {
        if (subdirDepth > 0 || subdirDepth < 0) {
            todoDirList.add(p);
            Path paths[] = new Path[1];
            paths[0] = p;
            for (FileStatus subfilestatus : fs.listStatus(p)) {
                Path subfile = subfilestatus.getPath();
                try {
                    recursiveCrawlBuildList(fs, subfile, subdirDepth - 1, crawlId, todoFileList, todoDirList);
                } catch (IOException iex) {
                    iex.printStackTrace();
                }
            }
        }
    }
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static Path[] findVcfs(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.isDirectory(path)) {
        FileStatus[] fileStatuses = fs.listStatus(path, new HiddenPathFilter());
        Path[] vcfs = new Path[fileStatuses.length];
        int i = 0;
        for (FileStatus status : fileStatuses) {
            vcfs[i++] = status.getPath();
        }/* w  w  w  .ja  v  a 2  s .  co m*/
        return vcfs;
    } else {
        return new Path[] { path };
    }
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static boolean sampleGroupExists(Path path, Configuration conf, String sampleGroup) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
        return false;
    }/*from  w ww .j  a  v a  2s.  c  om*/
    for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) {
        for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) {
            if (fs.listStatus(posStatus.getPath(),
                    new PartitionPathFilter("sample_group", sampleGroup)).length > 0) {
                return true;
            }
        }
    }
    return false;
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static void deleteSampleGroup(Path path, Configuration conf, String sampleGroup) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
        return;//from w  w  w. java 2s  .c o m
    }
    for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) {
        for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) {
            for (FileStatus sampleGroupStatus : fs.listStatus(posStatus.getPath(),
                    new PartitionPathFilter("sample_group", sampleGroup))) {
                fs.delete(sampleGroupStatus.getPath(), true);
            }
        }
    }
}