List of usage examples for org.apache.hadoop.fs FileStatus isDirectory
public boolean isDirectory()
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperator.java
License:Apache License
/** * Scans the directory for new files.// www. ja va 2 s . com */ protected void scanDirectory() { if (System.currentTimeMillis() - scanIntervalMillis >= lastScanMillis) { Set<Path> newPaths = scanner.scan(fs, filePath, processedFiles); for (Path newPath : newPaths) { try { FileStatus fileStatus = fs.getFileStatus(newPath); if (fileStatus.isDirectory()) { checkVisitedDirectory(newPath); } else { String newPathString = newPath.toString(); pendingFiles.add(newPathString); processedFiles.add(newPathString); localProcessedFileCount.increment(); } } catch (IOException e) { throw new RuntimeException(e); } } lastScanMillis = System.currentTimeMillis(); } }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileSplitter.java
License:Apache License
/** * Creates file-metadata and populates no. of blocks in the metadata. * * @param fileInfo file information//from ww w . j ava 2 s .co m * @return file-metadata * @throws IOException */ protected FileMetadata buildFileMetadata(FileInfo fileInfo) throws IOException { LOG.debug("file {}", fileInfo.getFilePath()); FileMetadata fileMetadata = createFileMetadata(fileInfo); Path path = new Path(fileInfo.getFilePath()); fileMetadata.setFileName(path.getName()); FileStatus status = getFileStatus(path); fileMetadata.setDirectory(status.isDirectory()); fileMetadata.setFileLength(status.getLen()); if (fileInfo.getDirectoryPath() == null) { // Direct filename is given as input. fileMetadata.setRelativePath(status.getPath().getName()); } else { String relativePath = getRelativePathWithFolderName(fileInfo); fileMetadata.setRelativePath(relativePath); } if (!status.isDirectory()) { int noOfBlocks = (int) ((status.getLen() / blockSize) + (((status.getLen() % blockSize) == 0) ? 0 : 1)); if (fileMetadata.getDataOffset() >= status.getLen()) { noOfBlocks = 0; } fileMetadata.setNumberOfBlocks(noOfBlocks); populateBlockIds(fileMetadata); } return fileMetadata; }
From source file:org.apache.apex.malhar.lib.io.fs.FileSplitter.java
License:Apache License
/** * Creates file-metadata and populates no. of blocks in the metadata. * * @param fileInfo file information/*from www. ja v a 2 s .c o m*/ * @return file-metadata * @throws IOException */ protected FileMetadata buildFileMetadata(FileInfo fileInfo) throws IOException { String filePathStr = fileInfo.getFilePath(); LOG.debug("file {}", filePathStr); FileMetadata fileMetadata = new FileMetadata(filePathStr); Path path = new Path(filePathStr); fileMetadata.setFileName(path.getName()); FileStatus status = fs.getFileStatus(path); fileMetadata.setDirectory(status.isDirectory()); fileMetadata.setFileLength(status.getLen()); if (!status.isDirectory()) { int noOfBlocks = (int) ((status.getLen() / blockSize) + (((status.getLen() % blockSize) == 0) ? 0 : 1)); if (fileMetadata.getDataOffset() >= status.getLen()) { noOfBlocks = 0; } fileMetadata.setNumberOfBlocks(noOfBlocks); populateBlockIds(fileMetadata); } return fileMetadata; }
From source file:org.apache.asterix.aoya.AsterixApplicationMaster.java
License:Apache License
/** * Here I am just pointing the Containers to the exisiting HDFS resources given by the Client * filesystem of the nodes.//from w w w .ja va 2s .c o m * * @throws IOException */ private void localizeDFSResources() throws IOException { //if performing an 'offline' task, skip a lot of resource distribution if (obliterate || backup || restore) { if (appMasterJar == null || ("").equals(appMasterJar)) { //this can happen in a jUnit testing environment. we don't need to set it there. if (!conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { throw new IllegalStateException("AM jar not provided in environment."); } else { return; } } FileSystem fs = FileSystem.get(conf); FileStatus appMasterJarStatus = fs.getFileStatus(appMasterJar); LocalResource obliteratorJar = Records.newRecord(LocalResource.class); obliteratorJar.setType(LocalResourceType.FILE); obliteratorJar.setVisibility(LocalResourceVisibility.PRIVATE); obliteratorJar.setResource(ConverterUtils.getYarnUrlFromPath(appMasterJar)); obliteratorJar.setTimestamp(appMasterJarStatus.getModificationTime()); obliteratorJar.setSize(appMasterJarStatus.getLen()); localResources.put("asterix-yarn.jar", obliteratorJar); LOG.info(localResources.values()); return; } //otherwise, distribute evertything to start up asterix LocalResource asterixZip = Records.newRecord(LocalResource.class); //this un-tar's the asterix distribution asterixZip.setType(LocalResourceType.ARCHIVE); asterixZip.setVisibility(LocalResourceVisibility.PRIVATE); try { asterixZip.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixZipPath))); } catch (URISyntaxException e) { LOG.error("Error locating Asterix zip" + " in env, path=" + asterixZipPath); throw new IOException(e); } asterixZip.setTimestamp(asterixZipTimestamp); asterixZip.setSize(asterixZipLen); localResources.put(ASTERIX_ZIP_NAME, asterixZip); //now let's do the same for the cluster description XML LocalResource asterixConf = Records.newRecord(LocalResource.class); asterixConf.setType(LocalResourceType.FILE); asterixConf.setVisibility(LocalResourceVisibility.PRIVATE); try { asterixConf.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixConfPath))); } catch (URISyntaxException e) { LOG.error("Error locating Asterix config" + " in env, path=" + asterixConfPath); throw new IOException(e); } //TODO: I could avoid localizing this everywhere by only calling this block on the metadata node. asterixConf.setTimestamp(asterixConfTimestamp); asterixConf.setSize(asterixConfLen); localResources.put("cluster-config.xml", asterixConf); //now add the libraries if there are any try { FileSystem fs = FileSystem.get(conf); Path p = new Path(dfsBasePath, instanceConfPath + File.separator + "library" + Path.SEPARATOR); if (fs.exists(p)) { FileStatus[] dataverses = fs.listStatus(p); for (FileStatus d : dataverses) { if (!d.isDirectory()) throw new IOException("Library configuration directory structure is incorrect"); FileStatus[] libraries = fs.listStatus(d.getPath()); for (FileStatus l : libraries) { if (l.isDirectory()) throw new IOException("Library configuration directory structure is incorrect"); LocalResource lr = Records.newRecord(LocalResource.class); lr.setResource(ConverterUtils.getYarnUrlFromURI(l.getPath().toUri())); lr.setSize(l.getLen()); lr.setTimestamp(l.getModificationTime()); lr.setType(LocalResourceType.ARCHIVE); lr.setVisibility(LocalResourceVisibility.PRIVATE); localResources.put("library" + Path.SEPARATOR + d.getPath().getName() + Path.SEPARATOR + l.getPath().getName().split("\\.")[0], lr); LOG.info("Found library: " + l.getPath().toString()); LOG.info(l.getPath().getName()); } } } } catch (FileNotFoundException e) { LOG.info("No external libraries present"); //do nothing, it just means there aren't libraries. that is possible and ok // it should be handled by the fs.exists(p) check though. } LOG.info(localResources.values()); }
From source file:org.apache.asterix.metadata.utils.ExternalIndexingOperations.java
License:Apache License
private static void handleFile(Dataset dataset, List<ExternalFile> files, FileSystem fs, FileStatus fileStatus, int nextFileNumber) throws IOException { if (fileStatus.isDirectory()) { listSubFiles(dataset, fs, fileStatus, files); } else {//from w ww .j a v a 2 s . com files.add(new ExternalFile(dataset.getDataverseName(), dataset.getDatasetName(), nextFileNumber, fileStatus.getPath().toUri().getPath(), new Date(fileStatus.getModificationTime()), fileStatus.getLen(), ExternalFilePendingOp.NO_OP)); } }
From source file:org.apache.beam.sdk.io.hdfs.HadoopFileSystem.java
License:Apache License
private Set<Metadata> matchRecursiveGlob(String directorySpec, String fileSpec) throws IOException { final org.apache.hadoop.fs.FileSystem fs = new Path(directorySpec).getFileSystem(configuration); Set<Metadata> metadata = new HashSet<>(); if (directorySpec.contains("*")) { // An abstract directory with a wildcard is converted to concrete directories to search. FileStatus[] directoryStatuses = fs.globStatus(new Path(directorySpec)); for (FileStatus directoryStatus : directoryStatuses) { if (directoryStatus.isDirectory()) { metadata.addAll(matchRecursiveGlob(directoryStatus.getPath().toUri().toString(), fileSpec)); }/*from w ww. j a v a 2 s . c o m*/ } } else { // A concrete directory is searched. FileStatus[] fileStatuses = fs.globStatus(new Path(directorySpec + "/" + fileSpec)); for (FileStatus fileStatus : fileStatuses) { if (fileStatus.isFile()) { metadata.add(toMetadata(fileStatus)); } } // All sub-directories of a concrete directory are searched. FileStatus[] directoryStatuses = fs.globStatus(new Path(directorySpec + "/*")); for (FileStatus directoryStatus : directoryStatuses) { if (directoryStatus.isDirectory()) { metadata.addAll(matchRecursiveGlob(directoryStatus.getPath().toUri().toString(), fileSpec)); } } // Handle additional instances of recursive globs. if (fileSpec.contains("**")) { int index = fileSpec.indexOf("**"); metadata.addAll(matchRecursiveGlob(directorySpec + "/" + fileSpec.substring(0, index + 1), fileSpec.substring(index + 1))); } } return metadata; }
From source file:org.apache.camel.component.hdfs2.HdfsConsumer.java
License:Apache License
private boolean normalFileIsDirectoryNoSuccessFile(FileStatus status, HdfsInfo info) throws IOException { if (config.getFileType().equals(HdfsFileType.NORMAL_FILE) && status.isDirectory()) { Path successPath = new Path(status.getPath().toString() + "/_SUCCESS"); if (!info.getFileSystem().exists(successPath)) { return true; }/*ww w . j a v a 2 s . co m*/ } return false; }
From source file:org.apache.distributedlog.fs.TestDLFileSystem.java
License:Apache License
@Test public void testListStatuses() throws Exception { Path parentPath = new Path("/path/to/" + runtime.getMethodName()); assertFalse(fs.exists(parentPath));//from w w w . ja v a 2s . c om try (FSDataOutputStream parentOut = fs.create(parentPath)) { parentOut.writeBytes("parent"); parentOut.flush(); } assertTrue(fs.exists(parentPath)); int numLogs = 3; for (int i = 0; i < numLogs; i++) { Path path = new Path("/path/to/" + runtime.getMethodName() + "/" + runtime.getMethodName() + "-" + i); assertFalse(fs.exists(path)); try (FSDataOutputStream out = fs.create(path)) { out.writeBytes("line"); out.flush(); } assertTrue(fs.exists(path)); } FileStatus[] files = fs.listStatus(new Path("/path/to/" + runtime.getMethodName())); assertEquals(3, files.length); for (int i = 0; i < numLogs; i++) { FileStatus file = files[i]; assertEquals(4, file.getLen()); assertFalse(file.isDirectory()); assertEquals(3, file.getReplication()); assertEquals(0L, file.getModificationTime()); assertEquals(new Path("/path/to/" + runtime.getMethodName() + "/" + runtime.getMethodName() + "-" + i), file.getPath()); } }
From source file:org.apache.drill.exec.expr.fn.registry.RemoteFunctionRegistry.java
License:Apache License
/** * Concatenates udf are with root directory. * Creates udf area, if area does not exist. * Checks if area exists and is directory, if it is writable for current user, * throws {@link DrillRuntimeException} otherwise. * * @param fs file system where area should be created or checked * @param root root directory/* ww w . j a v a 2s . c o m*/ * @param directory directory path * @return path to area */ private Path createArea(FileSystem fs, String root, String directory) { Path path = new Path(new File(root, directory).toURI().getPath()); String fullPath = path.toUri().getPath(); try { fs.mkdirs(path); Preconditions.checkState(fs.exists(path), "Area [%s] must exist", fullPath); FileStatus fileStatus = fs.getFileStatus(path); Preconditions.checkState(fileStatus.isDirectory(), "Area [%s] must be a directory", fullPath); FsPermission permission = fileStatus.getPermission(); // It is considered that process user has write rights on directory if: // 1. process user is owner of the directory and has write rights // 2. process user is in group that has write rights // 3. any user has write rights Preconditions.checkState( (ImpersonationUtil.getProcessUserName().equals(fileStatus.getOwner()) && permission.getUserAction().implies(FsAction.WRITE)) || (Sets.newHashSet(ImpersonationUtil.getProcessUserGroupNames()).contains( fileStatus.getGroup()) && permission.getGroupAction().implies(FsAction.WRITE)) || permission.getOtherAction().implies(FsAction.WRITE), "Area [%s] must be writable and executable for application user", fullPath); } catch (Exception e) { if (e instanceof DrillRuntimeException) { throw (DrillRuntimeException) e; } // throws DrillRuntimeException.format(e, "Error during udf area creation [%s] on file system [%s]", fullPath, fs.getUri()); } logger.info("Created remote udf area [{}] on file system [{}]", fullPath, fs.getUri()); return path; }
From source file:org.apache.drill.exec.planner.sql.handlers.AnalyzeTableHandler.java
License:Apache License
private boolean tableModified(DrillFileSystem fs, Path parentPath, long statsModificationTime) throws IOException { for (final FileStatus file : fs.listStatus(parentPath)) { // If directory or files within it are modified if (file.getModificationTime() > statsModificationTime) { return true; }/* w w w .j a va 2 s. c om*/ // For a directory, we should recursively check sub-directories if (file.isDirectory() && tableModified(fs, file.getPath(), statsModificationTime)) { return true; } } return false; }