Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Queries the filesystem to load the file block metadata (e.g. DFS blocks) for the
 * given file. Adds the newly created block metadata and block location to the
 * perFsFileBlocks, so that the storage IDs for each block can be retrieved from
 * BlockLocation.//from w  ww  .  j ava 2  s.c  o  m
 *
 * Must be threadsafe. Access to 'perFsFileBlocks' and 'hostIndex' must be protected.
 */
private static void loadBlockMetadata(FileSystem fs, FileStatus file, FileDescriptor fd,
        HdfsFileFormat fileFormat, Map<FsKey, FileBlocksInfo> perFsFileBlocks, String tblName,
        ListMap<TNetworkAddress> hostIndex) {
    Preconditions.checkNotNull(fd);
    Preconditions.checkNotNull(perFsFileBlocks);
    Preconditions.checkArgument(!file.isDirectory());
    LOG.debug("load block md for " + tblName + " file " + fd.getFileName());

    if (!FileSystemUtil.hasGetFileBlockLocations(fs)) {
        synthesizeBlockMetadata(file, fd, fileFormat, hostIndex);
        return;
    }
    try {
        BlockLocation[] locations = null;
        if (file instanceof LocatedFileStatus) {
            locations = ((LocatedFileStatus) file).getBlockLocations();
        } else {
            locations = fs.getFileBlockLocations(file, 0, file.getLen());
        }
        Preconditions.checkNotNull(locations);

        // Loop over all blocks in the file.
        for (BlockLocation loc : locations) {
            Preconditions.checkNotNull(loc);
            fd.addFileBlock(createFileBlock(loc, hostIndex));
        }

        // Remember the THdfsFileBlocks and corresponding BlockLocations. Once all the
        // blocks are collected, the disk IDs will be queried in one batch per filesystem.
        FsKey fsKey = new FsKey(fs);
        synchronized (perFsFileBlocks) {
            FileBlocksInfo infos = perFsFileBlocks.get(fsKey);
            if (infos == null) {
                infos = new FileBlocksInfo();
                perFsFileBlocks.put(fsKey, infos);
            }
            infos.addBlocks(fd.getFileBlocks(), Arrays.asList(locations));
        }
    } catch (IOException e) {
        throw new RuntimeException(
                "couldn't determine block locations for path '" + file.getPath() + "':\n" + e.getMessage(), e);
    }
}

From source file:com.cloudera.impala.util.TestLoadMetadataUtil.java

License:Apache License

/**
 * Test if it returns the same file descriptor when the filepath is a normal file with
 * cache./*from  w  w  w  .  j  a  v  a  2 s . c  om*/
 */
private void testFileWithCache(MethodName methodName) throws IOException {
    Map<FsKey, FileBlocksInfo> perFsFileBlocks = Maps.newHashMap();
    Map<String, List<FileDescriptor>> fileDescMap = Maps.newHashMap();

    // Create old file description map
    Path cacheFilePath = createFileInHdfs("fileWithCache");
    Map<String, List<FileDescriptor>> oldFileDescMap = Maps.newHashMap();
    List<FileDescriptor> cacheList = new LinkedList<FileDescriptor>();
    FileStatus fileStatus = fs_.getFileStatus(cacheFilePath);
    FileDescriptor fdInCache = new FileDescriptor(cacheFilePath.getName(), fileStatus.getLen(),
            fileStatus.getModificationTime());
    cacheList.add(fdInCache);
    oldFileDescMap.put(fileStatus.getPath().getParent().toString(), cacheList);
    List<FileDescriptor> fileDesclist = null;
    switch (methodName) {
    case LOAD_FILE_DESCRIPTORS:
        fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap,
                HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap);
        break;
    case LOAD_VIA_LOCATED_FILE_STATUS:
        fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap,
                HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap);
        break;
    case LOAD_VIA_LIST_STATUS_ITERATOR:
        fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap,
                HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap);
        break;
    default:
        LOG.error("Unsupported enum method name");
        Preconditions.checkState(false);
    }

    for (FsKey key : perFsFileBlocks.keySet()) {
        assertEquals(HDFS_BASE_PATH, key.toString());
    }
    assertEquals(1, fileDesclist.size());
    assertEquals(fdInCache, fileDesclist.get(0));
}

From source file:com.cloudera.oryx.common.servcomp.Store.java

License:Open Source License

/**
 * @param dirKey location of directory whose contents will be downloaded
 * @param dir local {@link File} to store files/directories under
 * @throws IOException if an error occurs while downloading
 *//* w  ww .ja v  a2  s.c  o m*/
public void downloadDirectory(String dirKey, File dir) throws IOException {
    Preconditions.checkNotNull(dirKey);
    Preconditions.checkNotNull(dir);
    Preconditions.checkArgument(dir.exists() && dir.isDirectory(), "Not a directory: %s", dir);

    Path dirPath = Namespaces.toPath(dirKey);
    if (!fs.exists(dirPath)) {
        return;
    }
    Preconditions.checkArgument(fs.getFileStatus(dirPath).isDirectory(), "Not a directory: %s", dirPath);

    boolean complete = false;
    try {
        for (FileStatus status : fs.listStatus(dirPath)) {
            String name = status.getPath().getName();
            String fromKey = dirKey + '/' + name;
            File toLocal = new File(dir, name);
            if (status.isFile()) {
                download(fromKey, toLocal);
            } else {
                boolean success = toLocal.mkdir();
                if (!success && !toLocal.exists()) {
                    throw new IOException("Can't make " + toLocal);
                }
                downloadDirectory(fromKey, toLocal);
            }
        }
        complete = true;
    } finally {
        if (!complete) {
            log.warn("Failed to download {} so deleting {}", dirKey, dir);
            IOUtils.deleteRecursively(dir);
        }
    }
}

From source file:com.cloudera.oryx.common.servcomp.Store.java

License:Open Source License

/**
 * Lists contents of a directory. For file systems without a notion of directory, this lists prefixes that
 * have the same prefix as the given prefix, but excludes "directories" (keys with same prefix, but followed
 * by more path elements). Results are returned in lexicographically sorted order.
 *
 * @param prefix directory to list//  ww w  . j  a v a  2 s .c om
 * @param files if true, only list files, not directories
 * @return list of keys representing directory contents
 */
public List<String> list(String prefix, boolean files) throws IOException {
    Preconditions.checkNotNull(prefix);
    Path path = Namespaces.toPath(prefix);
    if (!fs.exists(path)) {
        return Collections.emptyList();
    }

    Preconditions.checkArgument(fs.getFileStatus(path).isDirectory(), "Not a directory: %s", path);
    FileStatus[] statuses = fs.listStatus(path, new FilesOrDirsPathFilter(fs, files));
    String prefixString = Namespaces.get().getPrefix();

    List<String> result = Lists.newArrayListWithCapacity(statuses.length);
    for (FileStatus fileStatus : statuses) {
        String listPath = fileStatus.getPath().toString();
        Preconditions.checkState(listPath.startsWith(prefixString), "%s doesn't start with %s", listPath,
                prefixString);
        if (!listPath.endsWith("_SUCCESS")) {
            listPath = listPath.substring(prefixString.length());
            if (fileStatus.isDirectory() && !listPath.endsWith("/")) {
                listPath += "/";
            }
            result.add(listPath);
        }
    }
    Collections.sort(result);
    return result;
}

From source file:com.cloudera.oryx.lambda.batch.BatchUpdateFunction.java

License:Open Source License

/**
 * @return paths from {@link FileStatus}es into one comma-separated String
 * @see FileInputFormat#addInputPath(org.apache.hadoop.mapreduce.Job, Path)
 *//*from  w  ww  .j  av  a2  s. c om*/
private static String joinFSPaths(FileSystem fs, FileStatus[] statuses) {
    StringBuilder joined = new StringBuilder();
    for (FileStatus status : statuses) {
        if (joined.length() > 0) {
            joined.append(',');
        }
        Path path = fs.makeQualified(status.getPath());
        joined.append(StringUtils.escapeString(path.toString()));
    }
    return joined.toString();
}

From source file:com.cloudera.recordbreaker.analyzer.FSAnalyzer.java

License:Open Source License

/**
 * <code>addFileMetadata</code> stores the pathname, size, owner, etc.
 *///from  w w w .j a  v  a2 s.  co  m
void addFileMetadata(final FileStatus fstatus, final long crawlId) {
    // Compute strings to represent file metadata
    Path insertFile = fstatus.getPath();
    final boolean isDir = fstatus.isDir();
    FsPermission fsp = fstatus.getPermission();
    final String permissions = (isDir ? "d" : "-") + fsp.getUserAction().SYMBOL + fsp.getGroupAction().SYMBOL
            + fsp.getOtherAction().SYMBOL;

    // Compute formal pathname representation
    String fnameString = null;
    String parentPathString = null;
    if (isDir && insertFile.getParent() == null) {
        parentPathString = "";
        fnameString = insertFile.toString();
    } else {
        fnameString = insertFile.getName();
        parentPathString = insertFile.getParent().toString();

        // REMIND --- mjc --- If we want to modify the Files table s.t. it does
        // not contain the filesystem prefix, then this would be the place to do it.

        if (!parentPathString.endsWith("/")) {
            parentPathString = parentPathString + "/";
        }
    }
    final String parentPath = parentPathString;
    final String fName = fnameString;
    final long fileId = dbQueue.execute(new SQLiteJob<Long>() {
        protected Long job(SQLiteConnection db) throws SQLiteException {
            SQLiteStatement stmt = db.prepare("INSERT into Files VALUES(null, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
            try {
                stmt.bind(1, isDir ? "True" : "False").bind(2, crawlId).bind(3, fName)
                        .bind(4, fstatus.getOwner()).bind(5, fstatus.getGroup()).bind(6, permissions)
                        .bind(7, fstatus.getLen())
                        .bind(8, fileDateFormat.format(new Date(fstatus.getModificationTime())))
                        .bind(9, parentPath);
                stmt.step();
                return db.getLastInsertId();
            } finally {
                stmt.dispose();
            }
        }
    }).complete();
}

From source file:com.cloudera.recordbreaker.analyzer.FSCrawler.java

License:Open Source License

/**
 * Traverse an entire region of the filesystem, analyzing files.
 * This code should://from   w  w w  .java 2 s .  com
 * a) Navigate the directory hierarchy
 * b) Run analysis code to figure out the file details
 * c) Invoke addSingleFile() appropriately.
 */
protected void recursiveCrawlBuildList(FileSystem fs, Path p, int subdirDepth, long crawlId,
        List<Path> todoFileList, List<Path> todoDirList) throws IOException {
    FileStatus fstatus = fs.getFileStatus(p);
    if (!fstatus.isDir()) {
        todoFileList.add(p);
    } else {
        if (subdirDepth > 0 || subdirDepth < 0) {
            todoDirList.add(p);
            Path paths[] = new Path[1];
            paths[0] = p;
            for (FileStatus subfilestatus : fs.listStatus(p)) {
                Path subfile = subfilestatus.getPath();
                try {
                    recursiveCrawlBuildList(fs, subfile, subdirDepth - 1, crawlId, todoFileList, todoDirList);
                } catch (IOException iex) {
                    iex.printStackTrace();
                }
            }
        }
    }
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static Path[] findVcfs(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.isDirectory(path)) {
        FileStatus[] fileStatuses = fs.listStatus(path, new HiddenPathFilter());
        Path[] vcfs = new Path[fileStatuses.length];
        int i = 0;
        for (FileStatus status : fileStatuses) {
            vcfs[i++] = status.getPath();
        }/* w  w  w  .ja  v  a 2  s .  co m*/
        return vcfs;
    } else {
        return new Path[] { path };
    }
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static boolean sampleGroupExists(Path path, Configuration conf, String sampleGroup) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
        return false;
    }/*from  w ww .j  a  v a  2s.  c  om*/
    for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) {
        for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) {
            if (fs.listStatus(posStatus.getPath(),
                    new PartitionPathFilter("sample_group", sampleGroup)).length > 0) {
                return true;
            }
        }
    }
    return false;
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static void deleteSampleGroup(Path path, Configuration conf, String sampleGroup) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
        return;//from w  w  w. java 2s  .c o m
    }
    for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) {
        for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) {
            for (FileStatus sampleGroupStatus : fs.listStatus(posStatus.getPath(),
                    new PartitionPathFilter("sample_group", sampleGroup))) {
                fs.delete(sampleGroupStatus.getPath(), true);
            }
        }
    }
}