Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:gobblin.data.management.retention.FsCleanableHelperTest.java

License:Apache License

@Test
public void testDeleteEmptyDirs() throws Exception {
    Properties props = new Properties();
    props.setProperty(FsCleanableHelper.SKIP_TRASH_KEY, Boolean.toString(true));
    FsCleanableHelper fsCleanableHelper = new FsCleanableHelper(this.fs, props, ConfigFactory.empty(), log);
    FileSystemDataset fsDataset = mock(FileSystemDataset.class);
    Path datasetRoot = new Path(testTempPath, "dataset1");
    when(fsDataset.datasetRoot()).thenReturn(datasetRoot);

    // To delete/*from   w w w  .  j  ava  2s .c  o m*/
    Path deleted1 = new Path(datasetRoot, "2016/01/01/13");
    Path deleted2 = new Path(datasetRoot, "2016/01/01/14");
    Path deleted3 = new Path(datasetRoot, "2016/01/02/15");

    // Do not delete
    Path notDeleted1 = new Path(datasetRoot, "2016/01/02/16");

    this.fs.mkdirs(deleted1);
    this.fs.mkdirs(deleted2);
    this.fs.mkdirs(deleted3);
    this.fs.mkdirs(notDeleted1);

    // Make sure all paths are created
    Assert.assertTrue(this.fs.exists(deleted1));
    Assert.assertTrue(this.fs.exists(deleted2));
    Assert.assertTrue(this.fs.exists(deleted3));
    Assert.assertTrue(this.fs.exists(notDeleted1));

    List<FileSystemDatasetVersion> deletableVersions = ImmutableList.<FileSystemDatasetVersion>of(
            new MockFileSystemDatasetVersion(deleted1), new MockFileSystemDatasetVersion(deleted2),
            new MockFileSystemDatasetVersion(deleted3));

    fsCleanableHelper.clean(deletableVersions, fsDataset);

    // Verify versions are deleted
    Assert.assertFalse(this.fs.exists(deleted1));
    Assert.assertFalse(this.fs.exists(deleted2));
    Assert.assertFalse(this.fs.exists(deleted3));

    // Verify versions are not deleted
    Assert.assertTrue(this.fs.exists(notDeleted1));

    // Verify empty parent dir "2016/01/01" is deleted
    Assert.assertFalse(this.fs.exists(deleted1.getParent()));

    // Verify non empty parent dir "2016/01/02" exists
    Assert.assertTrue(this.fs.exists(notDeleted1.getParent()));
}

From source file:gobblin.data.management.trash.Trash.java

License:Apache License

protected void ensureTrashLocationExists(FileSystem fs, Path trashLocation) throws IOException {
    if (fs.exists(trashLocation)) {
        if (!fs.isDirectory(trashLocation)) {
            throw new IOException(String.format("Trash location %s is not a directory.", trashLocation));
        }//ww  w  . j  av a  2 s  . co m

        if (!fs.exists(new Path(trashLocation, TRASH_IDENTIFIER_FILE))) {
            // If trash identifier file is not present, directory might have been created by user.
            // Add trash identifier file only if directory is empty.
            if (fs.listStatus(trashLocation).length > 0) {
                throw new IOException(String.format(
                        "Trash directory %s exists, but it does not look like a trash directory. "
                                + "File: %s missing and directory is not empty.",
                        trashLocation, TRASH_IDENTIFIER_FILE));
            } else if (!fs.createNewFile(new Path(trashLocation, TRASH_IDENTIFIER_FILE))) {
                throw new IOException(String.format("Failed to create file %s in existing trash directory %s.",
                        TRASH_IDENTIFIER_FILE, trashLocation));
            }
        }
    } else if (!(fs.mkdirs(trashLocation.getParent(), ALL_PERM) && fs.mkdirs(trashLocation, PERM)
            && fs.createNewFile(new Path(trashLocation, TRASH_IDENTIFIER_FILE)))) {
        // Failed to create directory or create trash identifier file.
        throw new IOException("Failed to create trash directory at " + trashLocation.toString());
    }
}

From source file:gobblin.data.management.trash.Trash.java

License:Apache License

/**
 * Move a path to trash. The absolute path of the input path will be replicated under the trash directory.
 * @param path {@link org.apache.hadoop.fs.FileSystem} path to move to trash.
 * @return true if move to trash was done successfully.
 * @throws IOException//w  ww .  jav a 2s  .c o  m
 */
@Override
public boolean moveToTrash(Path path) throws IOException {
    Path fullyResolvedPath = path.isAbsolute() ? path : new Path(this.fs.getWorkingDirectory(), path);
    Path targetPathInTrash = PathUtils.mergePaths(this.trashLocation, fullyResolvedPath);

    if (!this.fs.exists(targetPathInTrash.getParent())) {
        this.fs.mkdirs(targetPathInTrash.getParent());
    } else if (this.fs.exists(targetPathInTrash)) {
        targetPathInTrash = targetPathInTrash.suffix("_" + System.currentTimeMillis());
    }

    return this.fs.rename(fullyResolvedPath, targetPathInTrash);
}

From source file:gobblin.data.management.trash.TrashTest.java

License:Apache License

@Test
public void testMoveToTrashExistingFile() throws IOException {

    TrashTestBase trash = new TrashTestBase(new Properties());

    String fileName = "delete";

    Path pathToDelete = new Path("/path/to", fileName);
    Pattern expectedNamePattern = Pattern.compile("^" + fileName + "_[0-9]+$");

    final List<Pair<Path, Path>> movedPaths = Lists.newArrayList();

    when(trash.fs.exists(any(Path.class))).thenReturn(true);
    when(trash.fs.rename(any(Path.class), any(Path.class))).thenAnswer(new Answer<Boolean>() {
        @Override//from w w w. j a v  a  2  s  . com
        public Boolean answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            movedPaths.add(new Pair<Path, Path>((Path) args[0], (Path) args[1]));
            return true;
        }
    });

    Assert.assertTrue(trash.trash.moveToTrash(pathToDelete));

    verify(trash.fs, times(0)).mkdirs(any(Path.class));

    Assert.assertEquals(movedPaths.size(), 1);
    Assert.assertTrue(movedPaths.get(0).first().equals(pathToDelete));
    Assert.assertTrue(
            movedPaths.get(0).second().getParent().toString().endsWith(pathToDelete.getParent().toString()));
    Assert.assertTrue(expectedNamePattern.matcher(movedPaths.get(0).second().getName()).matches());
    Assert.assertTrue(movedPaths.get(0).second().getParent().getParent().getParent()
            .equals(trash.trash.getTrashLocation()));

}

From source file:gobblin.data.management.util.PathUtilsTest.java

License:Apache License

@Test
public void testIsAncestor() throws Exception {
    Path ancestor = new Path("/some/path");
    Assert.assertTrue(PathUtils.isAncestor(ancestor, new Path(ancestor, "more/elements")));
    Assert.assertTrue(PathUtils.isAncestor(ancestor, ancestor));
    Assert.assertFalse(PathUtils.isAncestor(ancestor, new Path("/unrelated/path")));
    Assert.assertFalse(PathUtils.isAncestor(ancestor, new Path("relative/path")));
    Assert.assertFalse(PathUtils.isAncestor(ancestor, ancestor.getParent()));

    Path relativeAncestor = new Path("relative/ancestor");
    Assert.assertTrue(PathUtils.isAncestor(relativeAncestor, new Path(relativeAncestor, "more/elements")));
    Assert.assertTrue(PathUtils.isAncestor(relativeAncestor, relativeAncestor));
    Assert.assertFalse(PathUtils.isAncestor(relativeAncestor, new Path("/unrelated/path")));
    Assert.assertFalse(PathUtils.isAncestor(relativeAncestor, new Path("relative/path")));
    Assert.assertFalse(PathUtils.isAncestor(relativeAncestor, relativeAncestor.getParent()));

}

From source file:gobblin.metastore.FsStateStore.java

License:Apache License

@Override
public void createAlias(String storeName, String original, String alias) throws IOException {
    Path originalTablePath = new Path(new Path(this.storeRootDir, storeName), original);
    if (!this.fs.exists(originalTablePath)) {
        throw new IOException(
                String.format("State file %s does not exist for table %s", originalTablePath, original));
    }//  w  ww. j  a va  2  s .c o m

    Path aliasTablePath = new Path(new Path(this.storeRootDir, storeName), alias);
    Path tmpAliasTablePath = new Path(aliasTablePath.getParent(),
            new Path(TMP_FILE_PREFIX, aliasTablePath.getName()));
    // Make a copy of the original table as a work-around because
    // Hadoop version 1.2.1 has no support for symlink yet.
    HadoopUtils.copyFile(this.fs, originalTablePath, this.fs, aliasTablePath, tmpAliasTablePath, true,
            this.conf);
}

From source file:gobblin.publisher.BaseDataPublisher.java

License:Apache License

protected void publishData(WorkUnitState state, int branchId, boolean publishSingleTaskData,
        Set<Path> writerOutputPathsMoved) throws IOException {
    // Get a ParallelRunner instance for moving files in parallel
    ParallelRunner parallelRunner = this.getParallelRunner(this.writerFileSystemByBranches.get(branchId));

    // The directory where the workUnitState wrote its output data.
    Path writerOutputDir = WriterUtils.getWriterOutputDir(state, this.numBranches, branchId);

    if (!this.writerFileSystemByBranches.get(branchId).exists(writerOutputDir)) {
        LOG.warn(String.format("Branch %d of WorkUnit %s produced no data", branchId, state.getId()));
        return;/*from   w w w . ja va 2 s .  c o  m*/
    }

    // The directory where the final output directory for this job will be placed.
    // It is a combination of DATA_PUBLISHER_FINAL_DIR and WRITER_FILE_PATH.
    Path publisherOutputDir = getPublisherOutputDir(state, branchId);

    if (publishSingleTaskData) {
        // Create final output directory
        WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId),
                publisherOutputDir, this.permissions.get(branchId));
        addSingleTaskWriterOutputToExistingDir(writerOutputDir, publisherOutputDir, state, branchId,
                parallelRunner);
    } else {
        if (writerOutputPathsMoved.contains(writerOutputDir)) {
            // This writer output path has already been moved for another task of the same extract
            // If publishSingleTaskData=true, writerOutputPathMoved is ignored.
            return;
        }

        if (this.publisherFileSystemByBranches.get(branchId).exists(publisherOutputDir)) {
            // The final output directory already exists, check if the job is configured to replace it.
            // If publishSingleTaskData=true, final output directory is never replaced.
            boolean replaceFinalOutputDir = this.getState()
                    .getPropAsBoolean(ForkOperatorUtils.getPropertyNameForBranch(
                            ConfigurationKeys.DATA_PUBLISHER_REPLACE_FINAL_DIR, this.numBranches, branchId));

            // If the final output directory is not configured to be replaced, put new data to the existing directory.
            if (!replaceFinalOutputDir) {
                addWriterOutputToExistingDir(writerOutputDir, publisherOutputDir, state, branchId,
                        parallelRunner);
                writerOutputPathsMoved.add(writerOutputDir);
                return;
            }

            // Delete the final output directory if it is configured to be replaced
            LOG.info("Deleting publisher output dir " + publisherOutputDir);
            this.publisherFileSystemByBranches.get(branchId).delete(publisherOutputDir, true);
        } else {
            // Create the parent directory of the final output directory if it does not exist
            WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId),
                    publisherOutputDir.getParent(), this.permissions.get(branchId));
        }

        movePath(parallelRunner, state, writerOutputDir, publisherOutputDir, branchId);
        writerOutputPathsMoved.add(writerOutputDir);
    }
}

From source file:gobblin.publisher.BaseDataPublisher.java

License:Apache License

protected void addSingleTaskWriterOutputToExistingDir(Path writerOutputDir, Path publisherOutputDir,
        WorkUnitState workUnitState, int branchId, ParallelRunner parallelRunner) throws IOException {
    String outputFilePropName = ForkOperatorUtils.getPropertyNameForBranch(
            ConfigurationKeys.WRITER_FINAL_OUTPUT_FILE_PATHS, this.numBranches, branchId);

    if (!workUnitState.contains(outputFilePropName)) {
        LOG.warn("Missing property " + outputFilePropName + ". This task may have pulled no data.");
        return;/*from  w  ww  .j  a va  2s . com*/
    }

    Iterable<String> taskOutputFiles = workUnitState.getPropAsSet(outputFilePropName);
    for (String taskOutputFile : taskOutputFiles) {
        Path taskOutputPath = new Path(taskOutputFile);
        if (!this.writerFileSystemByBranches.get(branchId).exists(taskOutputPath)) {
            LOG.warn("Task output file " + taskOutputFile + " doesn't exist.");
            continue;
        }
        String pathSuffix = taskOutputFile.substring(
                taskOutputFile.indexOf(writerOutputDir.toString()) + writerOutputDir.toString().length() + 1);
        Path publisherOutputPath = new Path(publisherOutputDir, pathSuffix);
        WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId),
                publisherOutputPath.getParent(), this.permissions.get(branchId));

        movePath(parallelRunner, workUnitState, taskOutputPath, publisherOutputPath, branchId);
    }
}

From source file:gobblin.publisher.BaseDataPublisher.java

License:Apache License

protected Collection<Path> recordPublisherOutputDirs(Path src, Path dst, int branchId) throws IOException {

    // Getting file status from src rather than dst, because at this time dst doesn't yet exist.
    // If src is a dir, add dst to the set of paths. Otherwise, add dst's parent.
    if (this.writerFileSystemByBranches.get(branchId).getFileStatus(src).isDirectory()) {
        return ImmutableList.<Path>of(dst);
    }//from w  w  w. jav a  2s.  co  m
    return ImmutableList.<Path>of(dst.getParent());
}

From source file:gobblin.publisher.BaseDataPublisher.java

License:Apache License

/**
 * Publish metadata to a set of paths/* w w w .j  a v a 2 s .  co m*/
 */
private void publishMetadata(String metadataValue, int branchId, Collection<Path> pathsToPublish)
        throws IOException {
    for (Path metadataOutputPath : pathsToPublish) {
        try {
            if (metadataOutputPath == null) {
                LOG.info("Metadata output path not set for branch " + String.valueOf(branchId)
                        + ", not publishing.");
                continue;
            }

            FileSystem fs = this.metaDataWriterFileSystemByBranches.get(branchId);

            if (!fs.exists(metadataOutputPath.getParent())) {
                WriterUtils.mkdirsWithRecursivePermission(fs, metadataOutputPath,
                        this.permissions.get(branchId));
            }

            //Delete the file if metadata already exists
            if (fs.exists(metadataOutputPath)) {
                HadoopUtils.deletePath(fs, metadataOutputPath, false);
            }
            LOG.info("Writing metadata for branch " + String.valueOf(branchId) + " to "
                    + metadataOutputPath.toString());
            try (FSDataOutputStream outputStream = fs.create(metadataOutputPath)) {
                outputStream.write(metadataValue.getBytes(StandardCharsets.UTF_8));
            }
        } catch (IOException e) {
            LOG.error("Metadata file is not generated: " + e, e);
        }
    }
}