List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.cloudera.impala.util.LoadMetadataUtil.java
License:Apache License
/** * Queries the filesystem to load the file block metadata (e.g. DFS blocks) for the * given file. Adds the newly created block metadata and block location to the * perFsFileBlocks, so that the storage IDs for each block can be retrieved from * BlockLocation.//from w ww . j ava 2 s.c o m * * Must be threadsafe. Access to 'perFsFileBlocks' and 'hostIndex' must be protected. */ private static void loadBlockMetadata(FileSystem fs, FileStatus file, FileDescriptor fd, HdfsFileFormat fileFormat, Map<FsKey, FileBlocksInfo> perFsFileBlocks, String tblName, ListMap<TNetworkAddress> hostIndex) { Preconditions.checkNotNull(fd); Preconditions.checkNotNull(perFsFileBlocks); Preconditions.checkArgument(!file.isDirectory()); LOG.debug("load block md for " + tblName + " file " + fd.getFileName()); if (!FileSystemUtil.hasGetFileBlockLocations(fs)) { synthesizeBlockMetadata(file, fd, fileFormat, hostIndex); return; } try { BlockLocation[] locations = null; if (file instanceof LocatedFileStatus) { locations = ((LocatedFileStatus) file).getBlockLocations(); } else { locations = fs.getFileBlockLocations(file, 0, file.getLen()); } Preconditions.checkNotNull(locations); // Loop over all blocks in the file. for (BlockLocation loc : locations) { Preconditions.checkNotNull(loc); fd.addFileBlock(createFileBlock(loc, hostIndex)); } // Remember the THdfsFileBlocks and corresponding BlockLocations. Once all the // blocks are collected, the disk IDs will be queried in one batch per filesystem. FsKey fsKey = new FsKey(fs); synchronized (perFsFileBlocks) { FileBlocksInfo infos = perFsFileBlocks.get(fsKey); if (infos == null) { infos = new FileBlocksInfo(); perFsFileBlocks.put(fsKey, infos); } infos.addBlocks(fd.getFileBlocks(), Arrays.asList(locations)); } } catch (IOException e) { throw new RuntimeException( "couldn't determine block locations for path '" + file.getPath() + "':\n" + e.getMessage(), e); } }
From source file:com.cloudera.impala.util.TestLoadMetadataUtil.java
License:Apache License
/** * Test if it returns the same file descriptor when the filepath is a normal file with * cache./*from w w w . j a v a 2 s . c om*/ */ private void testFileWithCache(MethodName methodName) throws IOException { Map<FsKey, FileBlocksInfo> perFsFileBlocks = Maps.newHashMap(); Map<String, List<FileDescriptor>> fileDescMap = Maps.newHashMap(); // Create old file description map Path cacheFilePath = createFileInHdfs("fileWithCache"); Map<String, List<FileDescriptor>> oldFileDescMap = Maps.newHashMap(); List<FileDescriptor> cacheList = new LinkedList<FileDescriptor>(); FileStatus fileStatus = fs_.getFileStatus(cacheFilePath); FileDescriptor fdInCache = new FileDescriptor(cacheFilePath.getName(), fileStatus.getLen(), fileStatus.getModificationTime()); cacheList.add(fdInCache); oldFileDescMap.put(fileStatus.getPath().getParent().toString(), cacheList); List<FileDescriptor> fileDesclist = null; switch (methodName) { case LOAD_FILE_DESCRIPTORS: fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap, HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap); break; case LOAD_VIA_LOCATED_FILE_STATUS: fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap, HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap); break; case LOAD_VIA_LIST_STATUS_ITERATOR: fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap, HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap); break; default: LOG.error("Unsupported enum method name"); Preconditions.checkState(false); } for (FsKey key : perFsFileBlocks.keySet()) { assertEquals(HDFS_BASE_PATH, key.toString()); } assertEquals(1, fileDesclist.size()); assertEquals(fdInCache, fileDesclist.get(0)); }
From source file:com.cloudera.oryx.common.servcomp.Store.java
License:Open Source License
/** * @param dirKey location of directory whose contents will be downloaded * @param dir local {@link File} to store files/directories under * @throws IOException if an error occurs while downloading *//* w ww .ja v a2 s.c o m*/ public void downloadDirectory(String dirKey, File dir) throws IOException { Preconditions.checkNotNull(dirKey); Preconditions.checkNotNull(dir); Preconditions.checkArgument(dir.exists() && dir.isDirectory(), "Not a directory: %s", dir); Path dirPath = Namespaces.toPath(dirKey); if (!fs.exists(dirPath)) { return; } Preconditions.checkArgument(fs.getFileStatus(dirPath).isDirectory(), "Not a directory: %s", dirPath); boolean complete = false; try { for (FileStatus status : fs.listStatus(dirPath)) { String name = status.getPath().getName(); String fromKey = dirKey + '/' + name; File toLocal = new File(dir, name); if (status.isFile()) { download(fromKey, toLocal); } else { boolean success = toLocal.mkdir(); if (!success && !toLocal.exists()) { throw new IOException("Can't make " + toLocal); } downloadDirectory(fromKey, toLocal); } } complete = true; } finally { if (!complete) { log.warn("Failed to download {} so deleting {}", dirKey, dir); IOUtils.deleteRecursively(dir); } } }
From source file:com.cloudera.oryx.common.servcomp.Store.java
License:Open Source License
/** * Lists contents of a directory. For file systems without a notion of directory, this lists prefixes that * have the same prefix as the given prefix, but excludes "directories" (keys with same prefix, but followed * by more path elements). Results are returned in lexicographically sorted order. * * @param prefix directory to list// ww w . j a v a 2 s .c om * @param files if true, only list files, not directories * @return list of keys representing directory contents */ public List<String> list(String prefix, boolean files) throws IOException { Preconditions.checkNotNull(prefix); Path path = Namespaces.toPath(prefix); if (!fs.exists(path)) { return Collections.emptyList(); } Preconditions.checkArgument(fs.getFileStatus(path).isDirectory(), "Not a directory: %s", path); FileStatus[] statuses = fs.listStatus(path, new FilesOrDirsPathFilter(fs, files)); String prefixString = Namespaces.get().getPrefix(); List<String> result = Lists.newArrayListWithCapacity(statuses.length); for (FileStatus fileStatus : statuses) { String listPath = fileStatus.getPath().toString(); Preconditions.checkState(listPath.startsWith(prefixString), "%s doesn't start with %s", listPath, prefixString); if (!listPath.endsWith("_SUCCESS")) { listPath = listPath.substring(prefixString.length()); if (fileStatus.isDirectory() && !listPath.endsWith("/")) { listPath += "/"; } result.add(listPath); } } Collections.sort(result); return result; }
From source file:com.cloudera.oryx.lambda.batch.BatchUpdateFunction.java
License:Open Source License
/** * @return paths from {@link FileStatus}es into one comma-separated String * @see FileInputFormat#addInputPath(org.apache.hadoop.mapreduce.Job, Path) *//*from w ww .j av a2 s. c om*/ private static String joinFSPaths(FileSystem fs, FileStatus[] statuses) { StringBuilder joined = new StringBuilder(); for (FileStatus status : statuses) { if (joined.length() > 0) { joined.append(','); } Path path = fs.makeQualified(status.getPath()); joined.append(StringUtils.escapeString(path.toString())); } return joined.toString(); }
From source file:com.cloudera.recordbreaker.analyzer.FSAnalyzer.java
License:Open Source License
/** * <code>addFileMetadata</code> stores the pathname, size, owner, etc. *///from w w w .j a v a2 s. co m void addFileMetadata(final FileStatus fstatus, final long crawlId) { // Compute strings to represent file metadata Path insertFile = fstatus.getPath(); final boolean isDir = fstatus.isDir(); FsPermission fsp = fstatus.getPermission(); final String permissions = (isDir ? "d" : "-") + fsp.getUserAction().SYMBOL + fsp.getGroupAction().SYMBOL + fsp.getOtherAction().SYMBOL; // Compute formal pathname representation String fnameString = null; String parentPathString = null; if (isDir && insertFile.getParent() == null) { parentPathString = ""; fnameString = insertFile.toString(); } else { fnameString = insertFile.getName(); parentPathString = insertFile.getParent().toString(); // REMIND --- mjc --- If we want to modify the Files table s.t. it does // not contain the filesystem prefix, then this would be the place to do it. if (!parentPathString.endsWith("/")) { parentPathString = parentPathString + "/"; } } final String parentPath = parentPathString; final String fName = fnameString; final long fileId = dbQueue.execute(new SQLiteJob<Long>() { protected Long job(SQLiteConnection db) throws SQLiteException { SQLiteStatement stmt = db.prepare("INSERT into Files VALUES(null, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); try { stmt.bind(1, isDir ? "True" : "False").bind(2, crawlId).bind(3, fName) .bind(4, fstatus.getOwner()).bind(5, fstatus.getGroup()).bind(6, permissions) .bind(7, fstatus.getLen()) .bind(8, fileDateFormat.format(new Date(fstatus.getModificationTime()))) .bind(9, parentPath); stmt.step(); return db.getLastInsertId(); } finally { stmt.dispose(); } } }).complete(); }
From source file:com.cloudera.recordbreaker.analyzer.FSCrawler.java
License:Open Source License
/** * Traverse an entire region of the filesystem, analyzing files. * This code should://from w w w .java 2 s . com * a) Navigate the directory hierarchy * b) Run analysis code to figure out the file details * c) Invoke addSingleFile() appropriately. */ protected void recursiveCrawlBuildList(FileSystem fs, Path p, int subdirDepth, long crawlId, List<Path> todoFileList, List<Path> todoDirList) throws IOException { FileStatus fstatus = fs.getFileStatus(p); if (!fstatus.isDir()) { todoFileList.add(p); } else { if (subdirDepth > 0 || subdirDepth < 0) { todoDirList.add(p); Path paths[] = new Path[1]; paths[0] = p; for (FileStatus subfilestatus : fs.listStatus(p)) { Path subfile = subfilestatus.getPath(); try { recursiveCrawlBuildList(fs, subfile, subdirDepth - 1, crawlId, todoFileList, todoDirList); } catch (IOException iex) { iex.printStackTrace(); } } } } }
From source file:com.cloudera.science.quince.FileUtils.java
License:Open Source License
public static Path[] findVcfs(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); if (fs.isDirectory(path)) { FileStatus[] fileStatuses = fs.listStatus(path, new HiddenPathFilter()); Path[] vcfs = new Path[fileStatuses.length]; int i = 0; for (FileStatus status : fileStatuses) { vcfs[i++] = status.getPath(); }/* w w w .ja v a 2 s . co m*/ return vcfs; } else { return new Path[] { path }; } }
From source file:com.cloudera.science.quince.FileUtils.java
License:Open Source License
public static boolean sampleGroupExists(Path path, Configuration conf, String sampleGroup) throws IOException { FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { return false; }/*from w ww .j a v a 2s. c om*/ for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) { for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) { if (fs.listStatus(posStatus.getPath(), new PartitionPathFilter("sample_group", sampleGroup)).length > 0) { return true; } } } return false; }
From source file:com.cloudera.science.quince.FileUtils.java
License:Open Source License
public static void deleteSampleGroup(Path path, Configuration conf, String sampleGroup) throws IOException { FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { return;//from w w w. java 2s .c o m } for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) { for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) { for (FileStatus sampleGroupStatus : fs.listStatus(posStatus.getPath(), new PartitionPathFilter("sample_group", sampleGroup))) { fs.delete(sampleGroupStatus.getPath(), true); } } } }