List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:etl.cmd.test.XFsTestCase.java
License:Apache License
private Path initFileSystem(FileSystem fs) throws Exception { Path path = new Path(fs.getWorkingDirectory(), java.util.UUID.randomUUID().toString()); Path testDirInFs = fs.makeQualified(path); System.out.println(XLog.format("Setting FS testcase work dir[{0}]", testDirInFs)); if (fs.exists(testDirInFs)) { setAllPermissions(fs, testDirInFs); }// w w w. j a va 2s .c o m fs.delete(testDirInFs, true); if (!fs.mkdirs(path)) { throw new IOException(XLog.format("Could not create FS testcase dir [{0}]", testDirInFs)); } fs.setOwner(testDirInFs, getTestUser(), getTestGroup()); fs.setPermission(testDirInFs, FsPermission.valueOf("-rwxrwx--x")); return testDirInFs; }
From source file:gobblin.config.store.hdfs.SimpleHdfsConfigureStoreFactoryTest.java
License:Apache License
@Test public void testConfiguration() throws Exception { FileSystem localFS = FileSystem.getLocal(new Configuration()); Path testRoot = localFS.makeQualified(new Path("testConfiguration")); Path configRoot = localFS.makeQualified(new Path(testRoot, "dir2")); Path configStoreRoot = new Path(configRoot, SimpleHDFSConfigStore.CONFIG_STORE_NAME); Assert.assertTrue(localFS.mkdirs(configStoreRoot)); try {// w w w . j av a2 s. co m Config confConf1 = ConfigFactory.empty().withValue(SimpleHDFSConfigStoreFactory.DEFAULT_STORE_URI_KEY, ConfigValueFactory.fromAnyRef(configRoot.toString())); SimpleHDFSConfigStoreFactory confFactory = new SimpleHDFSConfigStoreFactory(confConf1); Assert.assertTrue(confFactory.hasDefaultStoreURI()); Assert.assertEquals(confFactory.getDefaultStoreURI(), configRoot.toUri()); Assert.assertEquals(confFactory.getPhysicalScheme(), "file"); Assert.assertEquals(confFactory.getDefaultRootDir().toString(), "file:" + System.getProperty("user.home")); // Valid path SimpleHDFSConfigStore store1 = confFactory.createConfigStore(new URI("simple-file:/d")); Assert.assertEquals(store1.getStoreURI().getScheme(), confFactory.getScheme()); Assert.assertEquals(store1.getStoreURI().getAuthority(), confFactory.getDefaultStoreURI().getAuthority()); Assert.assertEquals(store1.getStoreURI().getPath(), confFactory.getDefaultStoreURI().getPath()); // Invalid path Config confConf2 = ConfigFactory.empty().withValue(SimpleHDFSConfigStoreFactory.DEFAULT_STORE_URI_KEY, ConfigValueFactory.fromAnyRef(testRoot.toString())); try { new SimpleHDFSConfigStoreFactory(confConf2); Assert.fail("Exception expected"); } catch (IllegalArgumentException e) { Assert.assertTrue(e.getMessage().contains("Path does not appear to be a config store root")); } // Empty path Config confConf3 = ConfigFactory.empty().withValue(SimpleHDFSConfigStoreFactory.DEFAULT_STORE_URI_KEY, ConfigValueFactory.fromAnyRef("")); try { new SimpleHDFSConfigStoreFactory(confConf3); Assert.fail("Exception expected"); } catch (IllegalArgumentException e) { Assert.assertTrue(e.getMessage().contains("Default store URI should be non-empty")); } } finally { localFS.delete(testRoot, true); } }
From source file:gobblin.data.management.copy.CopyContext.java
License:Apache License
/** * Get cached {@link FileStatus}./*from ww w .j a v a2 s .c o m*/ */ public Optional<FileStatus> getFileStatus(final FileSystem fs, final Path path) throws IOException { try { return this.fileStatusCache.get(fs.makeQualified(path), new Callable<Optional<FileStatus>>() { @Override public Optional<FileStatus> call() throws Exception { try { return Optional.of(fs.getFileStatus(path)); } catch (FileNotFoundException fnfe) { return Optional.absent(); } } }); } catch (ExecutionException ee) { throw new IOException(ee.getCause()); } }
From source file:gobblin.data.management.copy.hive.HiveTargetPathHelper.java
License:Apache License
/** * Compute the target {@link Path} for a file or directory copied by Hive distcp. * * <p>/*from w w w . j a v a 2s .c om*/ * The target locations of data files for this table depend on the values of the resolved table root (e.g. * the value of {@link #COPY_TARGET_TABLE_ROOT} with tokens replaced) and {@link #RELOCATE_DATA_FILES_KEY}: * * if {@link #RELOCATE_DATA_FILES_KEY} is true, then origin file /path/to/file/myFile will be written to * /resolved/table/root/<partition>/myFile * * if {@link #COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED} and {@link #COPY_TARGET_TABLE_PREFIX_REPLACEMENT} are defined, * then the specified prefix in each file will be replaced by the specified replacement. * * otherwise, if the resolved table root is defined (e.g. {@link #COPY_TARGET_TABLE_ROOT} is defined in the * properties), we define: * origin_table_root := the deepest non glob ancestor of table.getSc().getLocation() iff getLocation() points to * a single glob. (e.g. /path/to/*/files -> /path/to). If getLocation() contains none * or multiple globs, job will fail. * relative_path := path of the file relative to origin_table_root. If the path of the file is not a descendant * of origin_table_root, job will fail. * target_path := /resolved/table/root/relative/path * This mode is useful when moving a table with a complicated directory structure to a different base directory. * * otherwise the target is identical to the origin path. * </p> * * * @param sourcePath Source path to be transformed. * @param targetFs target {@link FileSystem} * @param partition partition this file belongs to. * @param isConcreteFile true if this is a path to an existing file in HDFS. */ public Path getTargetPath(Path sourcePath, FileSystem targetFs, Optional<Partition> partition, boolean isConcreteFile) { if (this.relocateDataFiles) { Preconditions.checkArgument(this.targetTableRoot.isPresent(), "Must define %s to relocate data files.", COPY_TARGET_TABLE_ROOT); Path path = this.targetTableRoot.get(); if (partition.isPresent()) { path = addPartitionToPath(path, partition.get()); } if (!isConcreteFile) { return targetFs.makeQualified(path); } return targetFs.makeQualified(new Path(path, sourcePath.getName())); } // both prefixs must be present as the same time // can not used with option {@link #COPY_TARGET_TABLE_ROOT} if (this.targetTablePrefixTobeReplaced.isPresent() || this.targetTablePrefixReplacement.isPresent()) { Preconditions.checkState(this.targetTablePrefixTobeReplaced.isPresent(), String.format("Must specify both %s option and %s option together", COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED, COPY_TARGET_TABLE_PREFIX_REPLACEMENT)); Preconditions.checkState(this.targetTablePrefixReplacement.isPresent(), String.format("Must specify both %s option and %s option together", COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED, COPY_TARGET_TABLE_PREFIX_REPLACEMENT)); Preconditions.checkState(!this.targetTableRoot.isPresent(), String.format("Can not specify the option %s with option %s ", COPY_TARGET_TABLE_ROOT, COPY_TARGET_TABLE_PREFIX_REPLACEMENT)); Path targetPathWithoutSchemeAndAuthority = HiveCopyEntityHelper.replacedPrefix(sourcePath, this.targetTablePrefixTobeReplaced.get(), this.targetTablePrefixReplacement.get()); return targetFs.makeQualified(targetPathWithoutSchemeAndAuthority); } else if (this.targetTableRoot.isPresent()) { Preconditions.checkArgument(this.dataset.getTableRootPath().isPresent(), "Cannot move paths to a new root unless table has exactly one location."); Preconditions.checkArgument(PathUtils.isAncestor(this.dataset.getTableRootPath().get(), sourcePath), "When moving paths to a new root, all locations must be descendants of the table root location. " + "Table root location: %s, file location: %s.", this.dataset.getTableRootPath(), sourcePath); Path relativePath = PathUtils.relativizePath(sourcePath, this.dataset.getTableRootPath().get()); return targetFs.makeQualified(new Path(this.targetTableRoot.get(), relativePath)); } else { return targetFs.makeQualified(PathUtils.getPathWithoutSchemeAndAuthority(sourcePath)); } }
From source file:gobblin.data.management.copy.predicates.RegistrationTimeSkipPredicateTest.java
License:Apache License
@Test public void test() throws Exception { Path partition1Path = new Path("/path/to/partition1"); long modTime = 100000; CopyContext copyContext = new CopyContext(); CopyConfiguration copyConfiguration = Mockito.mock(CopyConfiguration.class); Mockito.doReturn(copyContext).when(copyConfiguration).getCopyContext(); HiveDataset dataset = Mockito.mock(HiveDataset.class); FileSystem fs = Mockito.spy(FileSystem.getLocal(new Configuration())); FileStatus status = new FileStatus(1, false, 1, 1, modTime, partition1Path); Path qualifiedPath = fs.makeQualified(partition1Path); Mockito.doReturn(status).when(fs).getFileStatus(qualifiedPath); Mockito.doReturn(status).when(fs).getFileStatus(partition1Path); Mockito.doReturn(fs).when(dataset).getFs(); HiveCopyEntityHelper helper = Mockito.mock(HiveCopyEntityHelper.class); Mockito.doReturn(copyConfiguration).when(helper).getConfiguration(); Mockito.doReturn(dataset).when(helper).getDataset(); RegistrationTimeSkipPredicate predicate = new RegistrationTimeSkipPredicate(helper); // partition exists, but registration time before modtime => don't skip HivePartitionFileSet pc = createPartitionCopy(partition1Path, modTime - 1, true); Assert.assertFalse(predicate.apply(pc)); // partition exists, registration time equal modtime => don't skip pc = createPartitionCopy(partition1Path, modTime, true); Assert.assertFalse(predicate.apply(pc)); // partition exists, registration time larger modtime => do skip pc = createPartitionCopy(partition1Path, modTime + 1, true); Assert.assertTrue(predicate.apply(pc)); // partition doesn't exist => don't skip pc = createPartitionCopy(partition1Path, modTime + 1, false); Assert.assertFalse(predicate.apply(pc)); // partition exists but is not annotated => don't skip pc = createPartitionCopy(partition1Path, modTime + 1, true); pc.getExistingTargetPartition().get().getParameters().clear(); Assert.assertFalse(predicate.apply(pc)); }
From source file:gobblin.data.management.copy.replication.ConfigBasedDataset.java
License:Apache License
private void calculateDatasetURN() { EndPoint e = this.copyRoute.getCopyTo(); if (e instanceof HadoopFsEndPoint) { HadoopFsEndPoint copyTo = (HadoopFsEndPoint) e; Configuration conf = HadoopUtils.newConfiguration(); try {//from ww w.ja va 2 s . c o m FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf); this.datasetURN = copyToFs.makeQualified(copyTo.getDatasetPath()).toString(); } catch (IOException e1) { // ignored } } this.datasetURN = e.toString(); }
From source file:gobblin.data.management.copy.replication.ConfigBasedDataset.java
License:Apache License
@Override public Collection<? extends CopyEntity> getCopyableFiles(FileSystem targetFs, CopyConfiguration copyConfiguration) throws IOException { List<CopyEntity> copyableFiles = Lists.newArrayList(); EndPoint copyFromRaw = copyRoute.getCopyFrom(); EndPoint copyToRaw = copyRoute.getCopyTo(); if (!(copyFromRaw instanceof HadoopFsEndPoint && copyToRaw instanceof HadoopFsEndPoint)) { log.warn("Currently only handle the Hadoop Fs EndPoint replication"); return copyableFiles; }/*from w w w .j a v a 2 s . c o m*/ if ((!copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent()) || (copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent() && copyFromRaw.getWatermark().get().compareTo(copyToRaw.getWatermark().get()) <= 0)) { log.info( "No need to copy as destination watermark >= source watermark with source watermark {}, for dataset with metadata {}", copyFromRaw.getWatermark().isPresent() ? copyFromRaw.getWatermark().get().toJson() : "N/A", this.rc.getMetaData()); return copyableFiles; } HadoopFsEndPoint copyFrom = (HadoopFsEndPoint) copyFromRaw; HadoopFsEndPoint copyTo = (HadoopFsEndPoint) copyToRaw; Configuration conf = HadoopUtils.newConfiguration(); FileSystem copyFromFs = FileSystem.get(copyFrom.getFsURI(), conf); FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf); Collection<FileStatus> allFilesInSource = copyFrom.getFiles(); Collection<FileStatus> allFilesInTarget = copyTo.getFiles(); final PathFilter pathFilter = DatasetUtils.instantiatePathFilter(this.props); Predicate<FileStatus> predicate = new Predicate<FileStatus>() { @Override public boolean apply(FileStatus input) { return pathFilter.accept(input.getPath()); } }; Set<FileStatus> copyFromFileStatuses = Sets.newHashSet(Collections2.filter(allFilesInSource, predicate)); Map<Path, FileStatus> copyToFileMap = Maps.newHashMap(); for (FileStatus f : allFilesInTarget) { if (pathFilter.accept(f.getPath())) { copyToFileMap.put(PathUtils.getPathWithoutSchemeAndAuthority(f.getPath()), f); } } Collection<Path> deletedPaths = Lists.newArrayList(); boolean watermarkMetadataCopied = false; boolean deleteTargetIfNotExistOnSource = rc.isDeleteTargetIfNotExistOnSource(); for (FileStatus originFileStatus : copyFromFileStatuses) { Path relative = PathUtils.relativizePath( PathUtils.getPathWithoutSchemeAndAuthority(originFileStatus.getPath()), PathUtils.getPathWithoutSchemeAndAuthority(copyFrom.getDatasetPath())); // construct the new path in the target file system Path newPath = new Path(copyTo.getDatasetPath(), relative); if (relative.toString().equals(ReplicaHadoopFsEndPoint.WATERMARK_FILE)) { watermarkMetadataCopied = true; } // skip copy same file if (copyToFileMap.containsKey(newPath) && copyToFileMap.get(newPath).getLen() == originFileStatus.getLen() && copyToFileMap.get(newPath).getModificationTime() > originFileStatus.getModificationTime()) { log.debug( "Copy from timestamp older than copy to timestamp, skipped copy {} for dataset with metadata {}", originFileStatus.getPath(), this.rc.getMetaData()); } else { // need to remove those files in the target File System if (copyToFileMap.containsKey(newPath)) { deletedPaths.add(newPath); } copyableFiles.add(CopyableFile .fromOriginAndDestination(copyFromFs, originFileStatus, copyToFs.makeQualified(newPath), copyConfiguration) .fileSet(PathUtils.getPathWithoutSchemeAndAuthority(copyTo.getDatasetPath()).toString()) .build()); } // clean up already checked paths copyToFileMap.remove(newPath); } // delete the paths on target directory if NOT exists on source if (deleteTargetIfNotExistOnSource) { deletedPaths.addAll(copyToFileMap.keySet()); } // delete old files first if (!deletedPaths.isEmpty()) { DeleteFileCommitStep deleteCommitStep = DeleteFileCommitStep.fromPaths(copyToFs, deletedPaths, this.props); copyableFiles.add(new PrePublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(), deleteCommitStep, 0)); } // generate the watermark file if ((!watermarkMetadataCopied) && copyFrom.getWatermark().isPresent()) { copyableFiles .add(new PostPublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(), new WatermarkMetadataGenerationCommitStep(copyTo.getFsURI().toString(), copyTo.getDatasetPath(), copyFrom.getWatermark().get()), 1)); } return copyableFiles; }
From source file:gobblin.data.management.copy.replication.HadoopFsEndPointDataset.java
License:Apache License
public HadoopFsEndPointDataset(HadoopFsEndPoint endPoint) { this.endPoint = endPoint; Configuration conf = HadoopUtils.newConfiguration(); try {//from w w w . j a va 2 s .c o m FileSystem fs = FileSystem.get(this.endPoint.getFsURI(), conf); qualifiedDatasetRoot = fs.makeQualified(this.endPoint.getDatasetPath()); } catch (IOException e1) { // ignored qualifiedDatasetRoot = this.endPoint.getDatasetPath(); } }
From source file:gobblin.data.management.trash.MockTrashTest.java
License:Apache License
@Test public void MockTrashTest() throws IOException { FileSystem fs = mock(FileSystem.class); Path homeDirectory = new Path("/home/directory"); when(fs.getHomeDirectory()).thenReturn(homeDirectory); when(fs.makeQualified(any(Path.class))).thenAnswer(new Answer<Path>() { @Override/* w ww . ja v a 2 s. co m*/ public Path answer(InvocationOnMock invocation) throws Throwable { return (Path) invocation.getArguments()[0]; } }); Trash trash = new MockTrash(fs, new Properties(), "user"); Assert.assertTrue(trash.moveToTrash(new Path("/some/path"))); verify(fs).getHomeDirectory(); verify(fs).makeQualified(any(Path.class)); verifyNoMoreInteractions(fs); }
From source file:gobblin.data.management.trash.Trash.java
License:Apache License
/** * Create location of Trash directory. Parsed from props at key {@link #TRASH_LOCATION_KEY}, defaulting to * /home/directory/_GOBBLIN_TRASH./* w w w . ja v a2 s. co m*/ * @param fs {@link org.apache.hadoop.fs.FileSystem} where trash should be found. * @param props {@link java.util.Properties} containing trash configuration. * @param user If the trash location contains the token $USER, the token will be replaced by the value of user. * @return {@link org.apache.hadoop.fs.Path} for trash directory. * @throws java.io.IOException */ protected Path createTrashLocation(FileSystem fs, Properties props, String user) throws IOException { Path trashLocation; if (props.containsKey(TRASH_LOCATION_KEY)) { trashLocation = new Path(props.getProperty(TRASH_LOCATION_KEY).replaceAll("\\$USER", user)); } else { trashLocation = new Path(fs.getHomeDirectory(), DEFAULT_TRASH_DIRECTORY); LOG.info("Using default trash location at " + trashLocation); } if (!trashLocation.isAbsolute()) { throw new IllegalArgumentException( "Trash location must be absolute. Found " + trashLocation.toString()); } Path qualifiedTrashLocation = fs.makeQualified(trashLocation); ensureTrashLocationExists(fs, qualifiedTrashLocation); return qualifiedTrashLocation; }