List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStoreFactory.java
License:Apache License
/** * This method determines the physical location of the {@link SimpleHDFSConfigStore} root directory on HDFS. It does * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory * contains the folder {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root * directory.//from w ww .j av a 2s. c o m * * <p> * If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does * not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If * the default root dir does not contain the {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME} then a * {@link ConfigStoreCreationException} is thrown. * </p> */ private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException { if (Strings.isNullOrEmpty(configKey.getAuthority())) { if (!hasDefaultStoreURI()) { throw new ConfigStoreCreationException(configKey, "No default store has been configured."); } return this.defaultStoreURI.get(); } Path path = new Path(configKey.getPath()); while (path != null) { try { // the abs URI may point to an unexist path for // 1. phantom node // 2. as URI did not specify the version if (fs.exists(path)) { for (FileStatus fileStatus : fs.listStatus(path)) { if (fileStatus.isDirectory() && fileStatus.getPath().getName().equals(SimpleHDFSConfigStore.CONFIG_STORE_NAME)) { return fs.getUri().resolve(fileStatus.getPath().getParent().toUri()); } } } } catch (IOException e) { throw new ConfigStoreCreationException(configKey, e); } path = path.getParent(); } throw new ConfigStoreCreationException(configKey, "Cannot find the store root!"); }
From source file:gobblin.data.management.copy.CopyableFile.java
License:Apache License
/** * Compute the correct {@link OwnerAndPermission} obtained from replicating source owner and permissions and applying * the {@link PreserveAttributes} rules for fromPath and every ancestor up to but excluding toPath. * * @return A list of the computed {@link OwnerAndPermission}s starting from fromPath, up to but excluding toPath. * @throws IOException if toPath is not an ancestor of fromPath. *///from w ww .ja v a 2 s . c o m public static List<OwnerAndPermission> resolveReplicatedOwnerAndPermissionsRecursively(FileSystem sourceFs, Path fromPath, Path toPath, CopyConfiguration copyConfiguration) throws IOException { if (!PathUtils.isAncestor(toPath, fromPath)) { throw new IOException(String.format("toPath %s must be an ancestor of fromPath %s.", toPath, fromPath)); } List<OwnerAndPermission> ownerAndPermissions = Lists.newArrayList(); Path currentPath = fromPath; while (PathUtils.isAncestor(toPath, currentPath.getParent())) { ownerAndPermissions.add(resolveReplicatedOwnerAndPermission(sourceFs, currentPath, copyConfiguration)); currentPath = currentPath.getParent(); } return ownerAndPermissions; }
From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java
License:Apache License
@Override public final void writeImpl(FileAwareInputStream fileAwareInputStream) throws IOException { CopyableFile copyableFile = fileAwareInputStream.getFile(); if (encryptionConfig != null) { copyableFile.setDestination(PathUtils.addExtension(copyableFile.getDestination(), "." + EncryptionConfigParser.getEncryptionType(encryptionConfig))); }//ww w. j a va2 s . co m Path stagingFile = getStagingFilePath(copyableFile); if (this.actualProcessedCopyableFile.isPresent()) { throw new IOException(this.getClass().getCanonicalName() + " can only process one file."); } this.actualProcessedCopyableFile = Optional.of(copyableFile); this.fs.mkdirs(stagingFile.getParent()); writeImpl(fileAwareInputStream.getInputStream(), stagingFile, copyableFile); this.filesWritten.incrementAndGet(); }
From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java
License:Apache License
/** * Moves the file from task staging to task output. Each task has its own staging directory but all the tasks share * the same task output directory./*w w w . j a v a 2 s .c o m*/ * * {@inheritDoc} * * @see gobblin.writer.DataWriter#commit() */ @Override public void commit() throws IOException { if (!this.actualProcessedCopyableFile.isPresent()) { return; } CopyableFile copyableFile = this.actualProcessedCopyableFile.get(); Path stagingFilePath = getStagingFilePath(copyableFile); Path outputFilePath = getOutputFilePath(copyableFile, this.outputDir, copyableFile.getDatasetAndPartition(this.copyableDatasetMetadata)); log.info(String.format("Committing data from %s to %s", stagingFilePath, outputFilePath)); try { setFilePermissions(copyableFile); Iterator<OwnerAndPermission> ancestorOwnerAndPermissionIt = copyableFile .getAncestorsOwnerAndPermission() == null ? Iterators.<OwnerAndPermission>emptyIterator() : copyableFile.getAncestorsOwnerAndPermission().iterator(); ensureDirectoryExists(this.fs, outputFilePath.getParent(), ancestorOwnerAndPermissionIt); if (!this.fs.rename(stagingFilePath, outputFilePath)) { // target exists throw new IOException(String.format("Could not commit file %s.", outputFilePath)); } } catch (IOException ioe) { // persist file this.recoveryHelper.persistFile(this.state, copyableFile, stagingFilePath); throw ioe; } finally { try { this.fs.delete(this.stagingDir, true); } catch (IOException ioe) { log.warn("Failed to delete staging path at " + this.stagingDir); } } }
From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java
License:Apache License
private void ensureDirectoryExists(FileSystem fs, Path path, Iterator<OwnerAndPermission> ownerAndPermissionIterator) throws IOException { if (fs.exists(path)) { return;//from w w w. j av a 2 s . com } if (ownerAndPermissionIterator.hasNext()) { OwnerAndPermission ownerAndPermission = ownerAndPermissionIterator.next(); if (path.getParent() != null) { ensureDirectoryExists(fs, path.getParent(), ownerAndPermissionIterator); } if (!fs.mkdirs(path)) { // fs.mkdirs returns false if path already existed. Do not overwrite permissions return; } if (ownerAndPermission.getFsPermission() != null) { log.debug("Applying permissions %s to path %s.", ownerAndPermission.getFsPermission(), path); fs.setPermission(path, addExecutePermissionToOwner(ownerAndPermission.getFsPermission())); } String group = ownerAndPermission.getGroup(); String owner = ownerAndPermission.getOwner(); if (group != null || owner != null) { log.debug("Applying owner %s and group %s to path %s.", owner, group, path); fs.setOwner(path, owner, group); } } else { fs.mkdirs(path); } }
From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterTest.java
License:Apache License
@Test public void testCommit() throws IOException { String destinationExistingToken = "destination"; String destinationAdditionalTokens = "path"; String fileName = "file"; // Asemble destination paths Path destination = new Path(new Path(new Path("/", destinationExistingToken), destinationAdditionalTokens), fileName);//from w w w . ja v a2s. c o m Path destinationWithoutLeadingSeparator = new Path( new Path(destinationExistingToken, destinationAdditionalTokens), fileName); // Create temp directory File tmpFile = Files.createTempDir(); tmpFile.deleteOnExit(); Path tmpPath = new Path(tmpFile.getAbsolutePath()); // create origin file Path originFile = new Path(tmpPath, fileName); this.fs.createNewFile(originFile); // create stating dir Path stagingDir = new Path(tmpPath, "staging"); this.fs.mkdirs(stagingDir); // create output dir Path outputDir = new Path(tmpPath, "output"); this.fs.mkdirs(outputDir); // create copyable file FileStatus status = this.fs.getFileStatus(originFile); FsPermission readWrite = new FsPermission(FsAction.READ_WRITE, FsAction.READ_WRITE, FsAction.READ_WRITE); FsPermission dirReadWrite = new FsPermission(FsAction.ALL, FsAction.READ_WRITE, FsAction.READ_WRITE); OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), readWrite); List<OwnerAndPermission> ancestorOwnerAndPermissions = Lists.newArrayList(); ancestorOwnerAndPermissions.add(ownerAndPermission); ancestorOwnerAndPermissions.add(ownerAndPermission); ancestorOwnerAndPermissions.add(ownerAndPermission); ancestorOwnerAndPermissions.add(ownerAndPermission); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher"); CopyableFile cf = CopyableFile .fromOriginAndDestination(this.fs, status, destination, CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties) .publishDir(new Path("/target")).preserve(PreserveAttributes.fromMnemonicString("")) .build()) .destinationOwnerAndPermission(ownerAndPermission) .ancestorsOwnerAndPermission(ancestorOwnerAndPermissions).build(); // create work unit state WorkUnitState state = TestUtils.createTestWorkUnitState(); state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, stagingDir.toUri().getPath()); state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, outputDir.toUri().getPath()); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5)); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata( new TestCopyableDataset(new Path("/source"))); CopySource.serializeCopyEntity(state, cf); CopySource.serializeCopyableDataset(state, metadata); // create writer FileAwareInputStreamDataWriter writer = new FileAwareInputStreamDataWriter(state, 1, 0); // create output of writer.write Path writtenFile = writer.getStagingFilePath(cf); this.fs.mkdirs(writtenFile.getParent()); this.fs.createNewFile(writtenFile); // create existing directories in writer output Path outputRoot = FileAwareInputStreamDataWriter.getPartitionOutputRoot(outputDir, cf.getDatasetAndPartition(metadata)); Path existingOutputPath = new Path(outputRoot, destinationExistingToken); this.fs.mkdirs(existingOutputPath); FileStatus fileStatus = this.fs.getFileStatus(existingOutputPath); FsPermission existingPathPermission = fileStatus.getPermission(); // check initial state of the relevant directories Assert.assertTrue(this.fs.exists(existingOutputPath)); Assert.assertEquals(this.fs.listStatus(existingOutputPath).length, 0); writer.actualProcessedCopyableFile = Optional.of(cf); // commit writer.commit(); // check state of relevant paths after commit Path expectedOutputPath = new Path(outputRoot, destinationWithoutLeadingSeparator); Assert.assertTrue(this.fs.exists(expectedOutputPath)); fileStatus = this.fs.getFileStatus(expectedOutputPath); Assert.assertEquals(fileStatus.getOwner(), ownerAndPermission.getOwner()); Assert.assertEquals(fileStatus.getGroup(), ownerAndPermission.getGroup()); Assert.assertEquals(fileStatus.getPermission(), readWrite); // parent should have permissions set correctly fileStatus = this.fs.getFileStatus(expectedOutputPath.getParent()); Assert.assertEquals(fileStatus.getPermission(), dirReadWrite); // previously existing paths should not have permissions changed fileStatus = this.fs.getFileStatus(existingOutputPath); Assert.assertEquals(fileStatus.getPermission(), existingPathPermission); Assert.assertFalse(this.fs.exists(writer.stagingDir)); }
From source file:gobblin.data.management.copy.writer.TarArchiveInputStreamDataWriter.java
License:Apache License
/** * Untars the passed in {@link FileAwareInputStream} to the task's staging directory. Uses the name of the root * {@link TarArchiveEntry} in the stream as the directory name for the untarred file. The method also commits the data * by moving the file from staging to output directory. * * @see gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter#write(gobblin.data.management.copy.FileAwareInputStream) *//*w w w .ja v a 2 s .co m*/ @Override public void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException { this.closer.register(inputStream); TarArchiveInputStream tarIn = new TarArchiveInputStream(inputStream); final ReadableByteChannel inputChannel = Channels.newChannel(tarIn); TarArchiveEntry tarEntry; // flush the first entry in the tar, which is just the root directory tarEntry = tarIn.getNextTarEntry(); String tarEntryRootName = StringUtils.remove(tarEntry.getName(), Path.SEPARATOR); log.info("Unarchiving at " + writeAt); try { while ((tarEntry = tarIn.getNextTarEntry()) != null) { // the API tarEntry.getName() is misleading, it is actually the path of the tarEntry in the tar file String newTarEntryPath = tarEntry.getName().replace(tarEntryRootName, writeAt.getName()); Path tarEntryStagingPath = new Path(writeAt.getParent(), newTarEntryPath); if (tarEntry.isDirectory() && !this.fs.exists(tarEntryStagingPath)) { this.fs.mkdirs(tarEntryStagingPath); } else if (!tarEntry.isDirectory()) { FSDataOutputStream out = this.fs.create(tarEntryStagingPath, true); final WritableByteChannel outputChannel = Channels.newChannel(out); try { StreamCopier copier = new StreamCopier(inputChannel, outputChannel); if (isInstrumentationEnabled()) { copier.withCopySpeedMeter(this.copySpeedMeter); } this.bytesWritten.addAndGet(copier.copy()); if (isInstrumentationEnabled()) { log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate()); } else { log.info("File {} copied.", copyableFile.getOrigin().getPath()); } } finally { out.close(); outputChannel.close(); } } } } finally { tarIn.close(); inputChannel.close(); inputStream.close(); } }
From source file:gobblin.data.management.retention.dataset.DatasetBase.java
License:Open Source License
/** * Perform the cleanup of old / deprecated dataset versions. See {@link gobblin.data.management.retention.DatasetCleaner} * javadoc for more information.//from ww w . jav a 2s. com * @throws java.io.IOException */ @Override public void clean() throws IOException { RetentionPolicy<T> retentionPolicy = getRetentionPolicy(); VersionFinder<? extends T> versionFinder = getVersionFinder(); if (!retentionPolicy.versionClass().isAssignableFrom(versionFinder.versionClass())) { throw new IOException("Incompatible dataset version classes."); } this.log.info("Cleaning dataset " + this); List<T> versions = Lists.newArrayList(getVersionFinder().findDatasetVersions(this)); Collections.sort(versions, Collections.reverseOrder()); Collection<T> deletableVersions = getRetentionPolicy().listDeletableVersions(versions); Set<Path> possiblyEmptyDirectories = new HashSet<Path>(); for (DatasetVersion versionToDelete : deletableVersions) { this.log.info("Deleting dataset version " + versionToDelete); Set<Path> pathsToDelete = versionToDelete.getPathsToDelete(); this.log.info("Deleting paths: " + Arrays.toString(pathsToDelete.toArray())); boolean deletedAllPaths = true; for (Path path : pathsToDelete) { boolean successfullyDeleted = this.deleteAsOwner ? this.trash.moveToTrashAsOwner(path) : this.trash.moveToTrash(path); if (successfullyDeleted) { possiblyEmptyDirectories.add(path.getParent()); } else { this.log.error("Failed to delete path " + path + " in dataset version " + versionToDelete); deletedAllPaths = false; } } if (!deletedAllPaths) { this.log.error("Failed to delete some paths in dataset version " + versionToDelete); } } if (this.deleteEmptyDirectories) { for (Path parentDirectory : possiblyEmptyDirectories) { deleteEmptyParentDirectories(datasetRoot(), parentDirectory); } } }
From source file:gobblin.data.management.retention.dataset.DatasetBase.java
License:Open Source License
private void deleteEmptyParentDirectories(Path datasetRoot, Path parent) throws IOException { if (!parent.equals(datasetRoot) && this.fs.listStatus(parent).length == 0) { this.fs.delete(parent, false); deleteEmptyParentDirectories(datasetRoot, parent.getParent()); }// ww w . ja va 2s . c o m }
From source file:gobblin.data.management.retention.dataset.FsCleanableHelper.java
License:Apache License
/** * Delete a single {@link FileSystemDatasetVersion}. All the parent {@link Path}s are after deletion, are * added to <code>possiblyEmptyDirectories</code>. Caller need to call {@link #cleanEmptyDirectories(Set, FileSystemDataset)} * to delete empty parent directories if any. *//* w w w . j a v a2s . co m*/ public void clean(final FileSystemDatasetVersion versionToDelete, final Set<Path> possiblyEmptyDirectories) throws IOException { log.info("Deleting dataset version " + versionToDelete); Set<Path> pathsToDelete = versionToDelete.getPaths(); log.info("Deleting paths: " + Arrays.toString(pathsToDelete.toArray())); boolean deletedAllPaths = true; for (Path path : pathsToDelete) { if (!this.fs.exists(path)) { log.info(String.format("Path %s in dataset version %s does not exist", path, versionToDelete)); continue; } boolean successfullyDeleted = deleteAsOwner ? trash.moveToTrashAsOwner(path) : trash.moveToTrash(path); if (successfullyDeleted) { possiblyEmptyDirectories.add(path.getParent()); } else { log.error("Failed to delete path " + path + " in dataset version " + versionToDelete); deletedAllPaths = false; } } if (!deletedAllPaths) { log.error("Failed to delete some paths in dataset version " + versionToDelete); } }