Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStoreFactory.java

License:Apache License

/**
 * This method determines the physical location of the {@link SimpleHDFSConfigStore} root directory on HDFS. It does
 * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory
 * contains the folder {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root
 * directory.//from w ww .j av  a 2s. c o m
 *
 * <p>
 *   If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does
 *   not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If
 *   the default root dir does not contain the {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME} then a
 *   {@link ConfigStoreCreationException} is thrown.
 * </p>
 */
private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException {
    if (Strings.isNullOrEmpty(configKey.getAuthority())) {
        if (!hasDefaultStoreURI()) {
            throw new ConfigStoreCreationException(configKey, "No default store has been configured.");
        }
        return this.defaultStoreURI.get();
    }

    Path path = new Path(configKey.getPath());

    while (path != null) {
        try {
            // the abs URI may point to an unexist path for
            // 1. phantom node
            // 2. as URI did not specify the version
            if (fs.exists(path)) {
                for (FileStatus fileStatus : fs.listStatus(path)) {
                    if (fileStatus.isDirectory()
                            && fileStatus.getPath().getName().equals(SimpleHDFSConfigStore.CONFIG_STORE_NAME)) {
                        return fs.getUri().resolve(fileStatus.getPath().getParent().toUri());
                    }
                }
            }
        } catch (IOException e) {
            throw new ConfigStoreCreationException(configKey, e);
        }

        path = path.getParent();
    }
    throw new ConfigStoreCreationException(configKey, "Cannot find the store root!");
}

From source file:gobblin.data.management.copy.CopyableFile.java

License:Apache License

/**
 * Compute the correct {@link OwnerAndPermission} obtained from replicating source owner and permissions and applying
 * the {@link PreserveAttributes} rules for fromPath and every ancestor up to but excluding toPath.
 *
 * @return A list of the computed {@link OwnerAndPermission}s starting from fromPath, up to but excluding toPath.
 * @throws IOException if toPath is not an ancestor of fromPath.
 *///from   w ww .ja  v a 2 s . c  o m
public static List<OwnerAndPermission> resolveReplicatedOwnerAndPermissionsRecursively(FileSystem sourceFs,
        Path fromPath, Path toPath, CopyConfiguration copyConfiguration) throws IOException {

    if (!PathUtils.isAncestor(toPath, fromPath)) {
        throw new IOException(String.format("toPath %s must be an ancestor of fromPath %s.", toPath, fromPath));
    }

    List<OwnerAndPermission> ownerAndPermissions = Lists.newArrayList();
    Path currentPath = fromPath;

    while (PathUtils.isAncestor(toPath, currentPath.getParent())) {
        ownerAndPermissions.add(resolveReplicatedOwnerAndPermission(sourceFs, currentPath, copyConfiguration));
        currentPath = currentPath.getParent();
    }

    return ownerAndPermissions;
}

From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java

License:Apache License

@Override
public final void writeImpl(FileAwareInputStream fileAwareInputStream) throws IOException {
    CopyableFile copyableFile = fileAwareInputStream.getFile();
    if (encryptionConfig != null) {
        copyableFile.setDestination(PathUtils.addExtension(copyableFile.getDestination(),
                "." + EncryptionConfigParser.getEncryptionType(encryptionConfig)));
    }//ww  w. j  a  va2 s  .  co m
    Path stagingFile = getStagingFilePath(copyableFile);
    if (this.actualProcessedCopyableFile.isPresent()) {
        throw new IOException(this.getClass().getCanonicalName() + " can only process one file.");
    }
    this.actualProcessedCopyableFile = Optional.of(copyableFile);
    this.fs.mkdirs(stagingFile.getParent());
    writeImpl(fileAwareInputStream.getInputStream(), stagingFile, copyableFile);
    this.filesWritten.incrementAndGet();
}

From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java

License:Apache License

/**
 * Moves the file from task staging to task output. Each task has its own staging directory but all the tasks share
 * the same task output directory./*w w  w  . j a v a 2  s  .c o  m*/
 *
 * {@inheritDoc}
 *
 * @see gobblin.writer.DataWriter#commit()
 */
@Override
public void commit() throws IOException {

    if (!this.actualProcessedCopyableFile.isPresent()) {
        return;
    }

    CopyableFile copyableFile = this.actualProcessedCopyableFile.get();
    Path stagingFilePath = getStagingFilePath(copyableFile);
    Path outputFilePath = getOutputFilePath(copyableFile, this.outputDir,
            copyableFile.getDatasetAndPartition(this.copyableDatasetMetadata));

    log.info(String.format("Committing data from %s to %s", stagingFilePath, outputFilePath));
    try {
        setFilePermissions(copyableFile);

        Iterator<OwnerAndPermission> ancestorOwnerAndPermissionIt = copyableFile
                .getAncestorsOwnerAndPermission() == null ? Iterators.<OwnerAndPermission>emptyIterator()
                        : copyableFile.getAncestorsOwnerAndPermission().iterator();

        ensureDirectoryExists(this.fs, outputFilePath.getParent(), ancestorOwnerAndPermissionIt);

        if (!this.fs.rename(stagingFilePath, outputFilePath)) {
            // target exists
            throw new IOException(String.format("Could not commit file %s.", outputFilePath));
        }
    } catch (IOException ioe) {
        // persist file
        this.recoveryHelper.persistFile(this.state, copyableFile, stagingFilePath);
        throw ioe;
    } finally {
        try {
            this.fs.delete(this.stagingDir, true);
        } catch (IOException ioe) {
            log.warn("Failed to delete staging path at " + this.stagingDir);
        }
    }
}

From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java

License:Apache License

private void ensureDirectoryExists(FileSystem fs, Path path,
        Iterator<OwnerAndPermission> ownerAndPermissionIterator) throws IOException {

    if (fs.exists(path)) {
        return;//from  w  w w.  j  av a  2  s  . com
    }

    if (ownerAndPermissionIterator.hasNext()) {
        OwnerAndPermission ownerAndPermission = ownerAndPermissionIterator.next();

        if (path.getParent() != null) {
            ensureDirectoryExists(fs, path.getParent(), ownerAndPermissionIterator);
        }

        if (!fs.mkdirs(path)) {
            // fs.mkdirs returns false if path already existed. Do not overwrite permissions
            return;
        }

        if (ownerAndPermission.getFsPermission() != null) {
            log.debug("Applying permissions %s to path %s.", ownerAndPermission.getFsPermission(), path);
            fs.setPermission(path, addExecutePermissionToOwner(ownerAndPermission.getFsPermission()));
        }

        String group = ownerAndPermission.getGroup();
        String owner = ownerAndPermission.getOwner();
        if (group != null || owner != null) {
            log.debug("Applying owner %s and group %s to path %s.", owner, group, path);
            fs.setOwner(path, owner, group);
        }
    } else {
        fs.mkdirs(path);
    }
}

From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterTest.java

License:Apache License

@Test
public void testCommit() throws IOException {

    String destinationExistingToken = "destination";
    String destinationAdditionalTokens = "path";
    String fileName = "file";

    // Asemble destination paths
    Path destination = new Path(new Path(new Path("/", destinationExistingToken), destinationAdditionalTokens),
            fileName);//from   w w w . ja  v a2s.  c o  m
    Path destinationWithoutLeadingSeparator = new Path(
            new Path(destinationExistingToken, destinationAdditionalTokens), fileName);

    // Create temp directory
    File tmpFile = Files.createTempDir();
    tmpFile.deleteOnExit();
    Path tmpPath = new Path(tmpFile.getAbsolutePath());

    // create origin file
    Path originFile = new Path(tmpPath, fileName);
    this.fs.createNewFile(originFile);

    // create stating dir
    Path stagingDir = new Path(tmpPath, "staging");
    this.fs.mkdirs(stagingDir);

    // create output dir
    Path outputDir = new Path(tmpPath, "output");
    this.fs.mkdirs(outputDir);

    // create copyable file
    FileStatus status = this.fs.getFileStatus(originFile);
    FsPermission readWrite = new FsPermission(FsAction.READ_WRITE, FsAction.READ_WRITE, FsAction.READ_WRITE);
    FsPermission dirReadWrite = new FsPermission(FsAction.ALL, FsAction.READ_WRITE, FsAction.READ_WRITE);
    OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(),
            readWrite);
    List<OwnerAndPermission> ancestorOwnerAndPermissions = Lists.newArrayList();
    ancestorOwnerAndPermissions.add(ownerAndPermission);
    ancestorOwnerAndPermissions.add(ownerAndPermission);
    ancestorOwnerAndPermissions.add(ownerAndPermission);
    ancestorOwnerAndPermissions.add(ownerAndPermission);

    Properties properties = new Properties();
    properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");

    CopyableFile cf = CopyableFile
            .fromOriginAndDestination(this.fs, status, destination,
                    CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties)
                            .publishDir(new Path("/target")).preserve(PreserveAttributes.fromMnemonicString(""))
                            .build())
            .destinationOwnerAndPermission(ownerAndPermission)
            .ancestorsOwnerAndPermission(ancestorOwnerAndPermissions).build();

    // create work unit state
    WorkUnitState state = TestUtils.createTestWorkUnitState();
    state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, stagingDir.toUri().getPath());
    state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, outputDir.toUri().getPath());
    state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5));
    CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(
            new TestCopyableDataset(new Path("/source")));
    CopySource.serializeCopyEntity(state, cf);
    CopySource.serializeCopyableDataset(state, metadata);

    // create writer
    FileAwareInputStreamDataWriter writer = new FileAwareInputStreamDataWriter(state, 1, 0);

    // create output of writer.write
    Path writtenFile = writer.getStagingFilePath(cf);
    this.fs.mkdirs(writtenFile.getParent());
    this.fs.createNewFile(writtenFile);

    // create existing directories in writer output
    Path outputRoot = FileAwareInputStreamDataWriter.getPartitionOutputRoot(outputDir,
            cf.getDatasetAndPartition(metadata));
    Path existingOutputPath = new Path(outputRoot, destinationExistingToken);
    this.fs.mkdirs(existingOutputPath);
    FileStatus fileStatus = this.fs.getFileStatus(existingOutputPath);
    FsPermission existingPathPermission = fileStatus.getPermission();

    // check initial state of the relevant directories
    Assert.assertTrue(this.fs.exists(existingOutputPath));
    Assert.assertEquals(this.fs.listStatus(existingOutputPath).length, 0);

    writer.actualProcessedCopyableFile = Optional.of(cf);

    // commit
    writer.commit();

    // check state of relevant paths after commit
    Path expectedOutputPath = new Path(outputRoot, destinationWithoutLeadingSeparator);
    Assert.assertTrue(this.fs.exists(expectedOutputPath));
    fileStatus = this.fs.getFileStatus(expectedOutputPath);
    Assert.assertEquals(fileStatus.getOwner(), ownerAndPermission.getOwner());
    Assert.assertEquals(fileStatus.getGroup(), ownerAndPermission.getGroup());
    Assert.assertEquals(fileStatus.getPermission(), readWrite);
    // parent should have permissions set correctly
    fileStatus = this.fs.getFileStatus(expectedOutputPath.getParent());
    Assert.assertEquals(fileStatus.getPermission(), dirReadWrite);
    // previously existing paths should not have permissions changed
    fileStatus = this.fs.getFileStatus(existingOutputPath);
    Assert.assertEquals(fileStatus.getPermission(), existingPathPermission);

    Assert.assertFalse(this.fs.exists(writer.stagingDir));
}

From source file:gobblin.data.management.copy.writer.TarArchiveInputStreamDataWriter.java

License:Apache License

/**
 * Untars the passed in {@link FileAwareInputStream} to the task's staging directory. Uses the name of the root
 * {@link TarArchiveEntry} in the stream as the directory name for the untarred file. The method also commits the data
 * by moving the file from staging to output directory.
 *
 * @see gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter#write(gobblin.data.management.copy.FileAwareInputStream)
 *//*w w w .ja v a  2  s .co m*/
@Override
public void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException {
    this.closer.register(inputStream);

    TarArchiveInputStream tarIn = new TarArchiveInputStream(inputStream);
    final ReadableByteChannel inputChannel = Channels.newChannel(tarIn);
    TarArchiveEntry tarEntry;

    // flush the first entry in the tar, which is just the root directory
    tarEntry = tarIn.getNextTarEntry();
    String tarEntryRootName = StringUtils.remove(tarEntry.getName(), Path.SEPARATOR);

    log.info("Unarchiving at " + writeAt);

    try {
        while ((tarEntry = tarIn.getNextTarEntry()) != null) {

            // the API tarEntry.getName() is misleading, it is actually the path of the tarEntry in the tar file
            String newTarEntryPath = tarEntry.getName().replace(tarEntryRootName, writeAt.getName());
            Path tarEntryStagingPath = new Path(writeAt.getParent(), newTarEntryPath);

            if (tarEntry.isDirectory() && !this.fs.exists(tarEntryStagingPath)) {
                this.fs.mkdirs(tarEntryStagingPath);
            } else if (!tarEntry.isDirectory()) {
                FSDataOutputStream out = this.fs.create(tarEntryStagingPath, true);
                final WritableByteChannel outputChannel = Channels.newChannel(out);
                try {
                    StreamCopier copier = new StreamCopier(inputChannel, outputChannel);
                    if (isInstrumentationEnabled()) {
                        copier.withCopySpeedMeter(this.copySpeedMeter);
                    }
                    this.bytesWritten.addAndGet(copier.copy());
                    if (isInstrumentationEnabled()) {
                        log.info("File {}: copied {} bytes, average rate: {} B/s",
                                copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(),
                                this.copySpeedMeter.getMeanRate());
                    } else {
                        log.info("File {} copied.", copyableFile.getOrigin().getPath());
                    }
                } finally {
                    out.close();
                    outputChannel.close();
                }
            }
        }
    } finally {
        tarIn.close();
        inputChannel.close();
        inputStream.close();
    }
}

From source file:gobblin.data.management.retention.dataset.DatasetBase.java

License:Open Source License

/**
 * Perform the cleanup of old / deprecated dataset versions. See {@link gobblin.data.management.retention.DatasetCleaner}
 * javadoc for more information.//from   ww  w . jav a  2s. com
 * @throws java.io.IOException
 */
@Override
public void clean() throws IOException {

    RetentionPolicy<T> retentionPolicy = getRetentionPolicy();
    VersionFinder<? extends T> versionFinder = getVersionFinder();

    if (!retentionPolicy.versionClass().isAssignableFrom(versionFinder.versionClass())) {
        throw new IOException("Incompatible dataset version classes.");
    }

    this.log.info("Cleaning dataset " + this);

    List<T> versions = Lists.newArrayList(getVersionFinder().findDatasetVersions(this));
    Collections.sort(versions, Collections.reverseOrder());

    Collection<T> deletableVersions = getRetentionPolicy().listDeletableVersions(versions);

    Set<Path> possiblyEmptyDirectories = new HashSet<Path>();

    for (DatasetVersion versionToDelete : deletableVersions) {
        this.log.info("Deleting dataset version " + versionToDelete);

        Set<Path> pathsToDelete = versionToDelete.getPathsToDelete();
        this.log.info("Deleting paths: " + Arrays.toString(pathsToDelete.toArray()));

        boolean deletedAllPaths = true;

        for (Path path : pathsToDelete) {
            boolean successfullyDeleted = this.deleteAsOwner ? this.trash.moveToTrashAsOwner(path)
                    : this.trash.moveToTrash(path);

            if (successfullyDeleted) {
                possiblyEmptyDirectories.add(path.getParent());
            } else {
                this.log.error("Failed to delete path " + path + " in dataset version " + versionToDelete);
                deletedAllPaths = false;
            }
        }

        if (!deletedAllPaths) {
            this.log.error("Failed to delete some paths in dataset version " + versionToDelete);
        }

    }

    if (this.deleteEmptyDirectories) {
        for (Path parentDirectory : possiblyEmptyDirectories) {
            deleteEmptyParentDirectories(datasetRoot(), parentDirectory);
        }
    }
}

From source file:gobblin.data.management.retention.dataset.DatasetBase.java

License:Open Source License

private void deleteEmptyParentDirectories(Path datasetRoot, Path parent) throws IOException {
    if (!parent.equals(datasetRoot) && this.fs.listStatus(parent).length == 0) {
        this.fs.delete(parent, false);
        deleteEmptyParentDirectories(datasetRoot, parent.getParent());
    }//  ww  w  .  ja  va  2s  .  c  o m
}

From source file:gobblin.data.management.retention.dataset.FsCleanableHelper.java

License:Apache License

/**
 * Delete a single {@link FileSystemDatasetVersion}. All the parent {@link Path}s are after deletion, are
 * added to <code>possiblyEmptyDirectories</code>. Caller need to call {@link #cleanEmptyDirectories(Set, FileSystemDataset)}
 * to delete empty parent directories if any.
 *//*  w w  w  . j a  v  a2s  . co m*/
public void clean(final FileSystemDatasetVersion versionToDelete, final Set<Path> possiblyEmptyDirectories)
        throws IOException {
    log.info("Deleting dataset version " + versionToDelete);

    Set<Path> pathsToDelete = versionToDelete.getPaths();
    log.info("Deleting paths: " + Arrays.toString(pathsToDelete.toArray()));

    boolean deletedAllPaths = true;

    for (Path path : pathsToDelete) {

        if (!this.fs.exists(path)) {
            log.info(String.format("Path %s in dataset version %s does not exist", path, versionToDelete));
            continue;
        }

        boolean successfullyDeleted = deleteAsOwner ? trash.moveToTrashAsOwner(path) : trash.moveToTrash(path);

        if (successfullyDeleted) {
            possiblyEmptyDirectories.add(path.getParent());
        } else {
            log.error("Failed to delete path " + path + " in dataset version " + versionToDelete);
            deletedAllPaths = false;
        }
    }

    if (!deletedAllPaths) {
        log.error("Failed to delete some paths in dataset version " + versionToDelete);
    }
}