List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:gobblin.data.management.retention.FsCleanableHelperTest.java
License:Apache License
@Test public void testDeleteEmptyDirs() throws Exception { Properties props = new Properties(); props.setProperty(FsCleanableHelper.SKIP_TRASH_KEY, Boolean.toString(true)); FsCleanableHelper fsCleanableHelper = new FsCleanableHelper(this.fs, props, ConfigFactory.empty(), log); FileSystemDataset fsDataset = mock(FileSystemDataset.class); Path datasetRoot = new Path(testTempPath, "dataset1"); when(fsDataset.datasetRoot()).thenReturn(datasetRoot); // To delete/*from w w w . j ava 2s .c o m*/ Path deleted1 = new Path(datasetRoot, "2016/01/01/13"); Path deleted2 = new Path(datasetRoot, "2016/01/01/14"); Path deleted3 = new Path(datasetRoot, "2016/01/02/15"); // Do not delete Path notDeleted1 = new Path(datasetRoot, "2016/01/02/16"); this.fs.mkdirs(deleted1); this.fs.mkdirs(deleted2); this.fs.mkdirs(deleted3); this.fs.mkdirs(notDeleted1); // Make sure all paths are created Assert.assertTrue(this.fs.exists(deleted1)); Assert.assertTrue(this.fs.exists(deleted2)); Assert.assertTrue(this.fs.exists(deleted3)); Assert.assertTrue(this.fs.exists(notDeleted1)); List<FileSystemDatasetVersion> deletableVersions = ImmutableList.<FileSystemDatasetVersion>of( new MockFileSystemDatasetVersion(deleted1), new MockFileSystemDatasetVersion(deleted2), new MockFileSystemDatasetVersion(deleted3)); fsCleanableHelper.clean(deletableVersions, fsDataset); // Verify versions are deleted Assert.assertFalse(this.fs.exists(deleted1)); Assert.assertFalse(this.fs.exists(deleted2)); Assert.assertFalse(this.fs.exists(deleted3)); // Verify versions are not deleted Assert.assertTrue(this.fs.exists(notDeleted1)); // Verify empty parent dir "2016/01/01" is deleted Assert.assertFalse(this.fs.exists(deleted1.getParent())); // Verify non empty parent dir "2016/01/02" exists Assert.assertTrue(this.fs.exists(notDeleted1.getParent())); }
From source file:gobblin.data.management.trash.Trash.java
License:Apache License
protected void ensureTrashLocationExists(FileSystem fs, Path trashLocation) throws IOException { if (fs.exists(trashLocation)) { if (!fs.isDirectory(trashLocation)) { throw new IOException(String.format("Trash location %s is not a directory.", trashLocation)); }//ww w . j av a 2 s . co m if (!fs.exists(new Path(trashLocation, TRASH_IDENTIFIER_FILE))) { // If trash identifier file is not present, directory might have been created by user. // Add trash identifier file only if directory is empty. if (fs.listStatus(trashLocation).length > 0) { throw new IOException(String.format( "Trash directory %s exists, but it does not look like a trash directory. " + "File: %s missing and directory is not empty.", trashLocation, TRASH_IDENTIFIER_FILE)); } else if (!fs.createNewFile(new Path(trashLocation, TRASH_IDENTIFIER_FILE))) { throw new IOException(String.format("Failed to create file %s in existing trash directory %s.", TRASH_IDENTIFIER_FILE, trashLocation)); } } } else if (!(fs.mkdirs(trashLocation.getParent(), ALL_PERM) && fs.mkdirs(trashLocation, PERM) && fs.createNewFile(new Path(trashLocation, TRASH_IDENTIFIER_FILE)))) { // Failed to create directory or create trash identifier file. throw new IOException("Failed to create trash directory at " + trashLocation.toString()); } }
From source file:gobblin.data.management.trash.Trash.java
License:Apache License
/** * Move a path to trash. The absolute path of the input path will be replicated under the trash directory. * @param path {@link org.apache.hadoop.fs.FileSystem} path to move to trash. * @return true if move to trash was done successfully. * @throws IOException//w ww . jav a 2s .c o m */ @Override public boolean moveToTrash(Path path) throws IOException { Path fullyResolvedPath = path.isAbsolute() ? path : new Path(this.fs.getWorkingDirectory(), path); Path targetPathInTrash = PathUtils.mergePaths(this.trashLocation, fullyResolvedPath); if (!this.fs.exists(targetPathInTrash.getParent())) { this.fs.mkdirs(targetPathInTrash.getParent()); } else if (this.fs.exists(targetPathInTrash)) { targetPathInTrash = targetPathInTrash.suffix("_" + System.currentTimeMillis()); } return this.fs.rename(fullyResolvedPath, targetPathInTrash); }
From source file:gobblin.data.management.trash.TrashTest.java
License:Apache License
@Test public void testMoveToTrashExistingFile() throws IOException { TrashTestBase trash = new TrashTestBase(new Properties()); String fileName = "delete"; Path pathToDelete = new Path("/path/to", fileName); Pattern expectedNamePattern = Pattern.compile("^" + fileName + "_[0-9]+$"); final List<Pair<Path, Path>> movedPaths = Lists.newArrayList(); when(trash.fs.exists(any(Path.class))).thenReturn(true); when(trash.fs.rename(any(Path.class), any(Path.class))).thenAnswer(new Answer<Boolean>() { @Override//from w w w. j a v a 2 s . com public Boolean answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); movedPaths.add(new Pair<Path, Path>((Path) args[0], (Path) args[1])); return true; } }); Assert.assertTrue(trash.trash.moveToTrash(pathToDelete)); verify(trash.fs, times(0)).mkdirs(any(Path.class)); Assert.assertEquals(movedPaths.size(), 1); Assert.assertTrue(movedPaths.get(0).first().equals(pathToDelete)); Assert.assertTrue( movedPaths.get(0).second().getParent().toString().endsWith(pathToDelete.getParent().toString())); Assert.assertTrue(expectedNamePattern.matcher(movedPaths.get(0).second().getName()).matches()); Assert.assertTrue(movedPaths.get(0).second().getParent().getParent().getParent() .equals(trash.trash.getTrashLocation())); }
From source file:gobblin.data.management.util.PathUtilsTest.java
License:Apache License
@Test public void testIsAncestor() throws Exception { Path ancestor = new Path("/some/path"); Assert.assertTrue(PathUtils.isAncestor(ancestor, new Path(ancestor, "more/elements"))); Assert.assertTrue(PathUtils.isAncestor(ancestor, ancestor)); Assert.assertFalse(PathUtils.isAncestor(ancestor, new Path("/unrelated/path"))); Assert.assertFalse(PathUtils.isAncestor(ancestor, new Path("relative/path"))); Assert.assertFalse(PathUtils.isAncestor(ancestor, ancestor.getParent())); Path relativeAncestor = new Path("relative/ancestor"); Assert.assertTrue(PathUtils.isAncestor(relativeAncestor, new Path(relativeAncestor, "more/elements"))); Assert.assertTrue(PathUtils.isAncestor(relativeAncestor, relativeAncestor)); Assert.assertFalse(PathUtils.isAncestor(relativeAncestor, new Path("/unrelated/path"))); Assert.assertFalse(PathUtils.isAncestor(relativeAncestor, new Path("relative/path"))); Assert.assertFalse(PathUtils.isAncestor(relativeAncestor, relativeAncestor.getParent())); }
From source file:gobblin.metastore.FsStateStore.java
License:Apache License
@Override public void createAlias(String storeName, String original, String alias) throws IOException { Path originalTablePath = new Path(new Path(this.storeRootDir, storeName), original); if (!this.fs.exists(originalTablePath)) { throw new IOException( String.format("State file %s does not exist for table %s", originalTablePath, original)); }// w ww. j a va 2 s .c o m Path aliasTablePath = new Path(new Path(this.storeRootDir, storeName), alias); Path tmpAliasTablePath = new Path(aliasTablePath.getParent(), new Path(TMP_FILE_PREFIX, aliasTablePath.getName())); // Make a copy of the original table as a work-around because // Hadoop version 1.2.1 has no support for symlink yet. HadoopUtils.copyFile(this.fs, originalTablePath, this.fs, aliasTablePath, tmpAliasTablePath, true, this.conf); }
From source file:gobblin.publisher.BaseDataPublisher.java
License:Apache License
protected void publishData(WorkUnitState state, int branchId, boolean publishSingleTaskData, Set<Path> writerOutputPathsMoved) throws IOException { // Get a ParallelRunner instance for moving files in parallel ParallelRunner parallelRunner = this.getParallelRunner(this.writerFileSystemByBranches.get(branchId)); // The directory where the workUnitState wrote its output data. Path writerOutputDir = WriterUtils.getWriterOutputDir(state, this.numBranches, branchId); if (!this.writerFileSystemByBranches.get(branchId).exists(writerOutputDir)) { LOG.warn(String.format("Branch %d of WorkUnit %s produced no data", branchId, state.getId())); return;/*from w w w . ja va 2 s . c o m*/ } // The directory where the final output directory for this job will be placed. // It is a combination of DATA_PUBLISHER_FINAL_DIR and WRITER_FILE_PATH. Path publisherOutputDir = getPublisherOutputDir(state, branchId); if (publishSingleTaskData) { // Create final output directory WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId), publisherOutputDir, this.permissions.get(branchId)); addSingleTaskWriterOutputToExistingDir(writerOutputDir, publisherOutputDir, state, branchId, parallelRunner); } else { if (writerOutputPathsMoved.contains(writerOutputDir)) { // This writer output path has already been moved for another task of the same extract // If publishSingleTaskData=true, writerOutputPathMoved is ignored. return; } if (this.publisherFileSystemByBranches.get(branchId).exists(publisherOutputDir)) { // The final output directory already exists, check if the job is configured to replace it. // If publishSingleTaskData=true, final output directory is never replaced. boolean replaceFinalOutputDir = this.getState() .getPropAsBoolean(ForkOperatorUtils.getPropertyNameForBranch( ConfigurationKeys.DATA_PUBLISHER_REPLACE_FINAL_DIR, this.numBranches, branchId)); // If the final output directory is not configured to be replaced, put new data to the existing directory. if (!replaceFinalOutputDir) { addWriterOutputToExistingDir(writerOutputDir, publisherOutputDir, state, branchId, parallelRunner); writerOutputPathsMoved.add(writerOutputDir); return; } // Delete the final output directory if it is configured to be replaced LOG.info("Deleting publisher output dir " + publisherOutputDir); this.publisherFileSystemByBranches.get(branchId).delete(publisherOutputDir, true); } else { // Create the parent directory of the final output directory if it does not exist WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId), publisherOutputDir.getParent(), this.permissions.get(branchId)); } movePath(parallelRunner, state, writerOutputDir, publisherOutputDir, branchId); writerOutputPathsMoved.add(writerOutputDir); } }
From source file:gobblin.publisher.BaseDataPublisher.java
License:Apache License
protected void addSingleTaskWriterOutputToExistingDir(Path writerOutputDir, Path publisherOutputDir, WorkUnitState workUnitState, int branchId, ParallelRunner parallelRunner) throws IOException { String outputFilePropName = ForkOperatorUtils.getPropertyNameForBranch( ConfigurationKeys.WRITER_FINAL_OUTPUT_FILE_PATHS, this.numBranches, branchId); if (!workUnitState.contains(outputFilePropName)) { LOG.warn("Missing property " + outputFilePropName + ". This task may have pulled no data."); return;/*from w ww .j a va 2s . com*/ } Iterable<String> taskOutputFiles = workUnitState.getPropAsSet(outputFilePropName); for (String taskOutputFile : taskOutputFiles) { Path taskOutputPath = new Path(taskOutputFile); if (!this.writerFileSystemByBranches.get(branchId).exists(taskOutputPath)) { LOG.warn("Task output file " + taskOutputFile + " doesn't exist."); continue; } String pathSuffix = taskOutputFile.substring( taskOutputFile.indexOf(writerOutputDir.toString()) + writerOutputDir.toString().length() + 1); Path publisherOutputPath = new Path(publisherOutputDir, pathSuffix); WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId), publisherOutputPath.getParent(), this.permissions.get(branchId)); movePath(parallelRunner, workUnitState, taskOutputPath, publisherOutputPath, branchId); } }
From source file:gobblin.publisher.BaseDataPublisher.java
License:Apache License
protected Collection<Path> recordPublisherOutputDirs(Path src, Path dst, int branchId) throws IOException { // Getting file status from src rather than dst, because at this time dst doesn't yet exist. // If src is a dir, add dst to the set of paths. Otherwise, add dst's parent. if (this.writerFileSystemByBranches.get(branchId).getFileStatus(src).isDirectory()) { return ImmutableList.<Path>of(dst); }//from w w w. jav a 2s. co m return ImmutableList.<Path>of(dst.getParent()); }
From source file:gobblin.publisher.BaseDataPublisher.java
License:Apache License
/** * Publish metadata to a set of paths/* w w w .j a v a 2 s . co m*/ */ private void publishMetadata(String metadataValue, int branchId, Collection<Path> pathsToPublish) throws IOException { for (Path metadataOutputPath : pathsToPublish) { try { if (metadataOutputPath == null) { LOG.info("Metadata output path not set for branch " + String.valueOf(branchId) + ", not publishing."); continue; } FileSystem fs = this.metaDataWriterFileSystemByBranches.get(branchId); if (!fs.exists(metadataOutputPath.getParent())) { WriterUtils.mkdirsWithRecursivePermission(fs, metadataOutputPath, this.permissions.get(branchId)); } //Delete the file if metadata already exists if (fs.exists(metadataOutputPath)) { HadoopUtils.deletePath(fs, metadataOutputPath, false); } LOG.info("Writing metadata for branch " + String.valueOf(branchId) + " to " + metadataOutputPath.toString()); try (FSDataOutputStream outputStream = fs.create(metadataOutputPath)) { outputStream.write(metadataValue.getBytes(StandardCharsets.UTF_8)); } } catch (IOException e) { LOG.error("Metadata file is not generated: " + e, e); } } }