Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:etl.cmd.test.XFsTestCase.java

License:Apache License

private Path initFileSystem(FileSystem fs) throws Exception {
    Path path = new Path(fs.getWorkingDirectory(), java.util.UUID.randomUUID().toString());
    Path testDirInFs = fs.makeQualified(path);
    System.out.println(XLog.format("Setting FS testcase work dir[{0}]", testDirInFs));
    if (fs.exists(testDirInFs)) {
        setAllPermissions(fs, testDirInFs);
    }// w w w. j a va  2s .c o m
    fs.delete(testDirInFs, true);
    if (!fs.mkdirs(path)) {
        throw new IOException(XLog.format("Could not create FS testcase dir [{0}]", testDirInFs));
    }
    fs.setOwner(testDirInFs, getTestUser(), getTestGroup());
    fs.setPermission(testDirInFs, FsPermission.valueOf("-rwxrwx--x"));
    return testDirInFs;
}

From source file:gobblin.config.store.hdfs.SimpleHdfsConfigureStoreFactoryTest.java

License:Apache License

@Test
public void testConfiguration() throws Exception {
    FileSystem localFS = FileSystem.getLocal(new Configuration());
    Path testRoot = localFS.makeQualified(new Path("testConfiguration"));
    Path configRoot = localFS.makeQualified(new Path(testRoot, "dir2"));
    Path configStoreRoot = new Path(configRoot, SimpleHDFSConfigStore.CONFIG_STORE_NAME);
    Assert.assertTrue(localFS.mkdirs(configStoreRoot));
    try {//  w w w  .  j av a2  s. co m
        Config confConf1 = ConfigFactory.empty().withValue(SimpleHDFSConfigStoreFactory.DEFAULT_STORE_URI_KEY,
                ConfigValueFactory.fromAnyRef(configRoot.toString()));
        SimpleHDFSConfigStoreFactory confFactory = new SimpleHDFSConfigStoreFactory(confConf1);
        Assert.assertTrue(confFactory.hasDefaultStoreURI());
        Assert.assertEquals(confFactory.getDefaultStoreURI(), configRoot.toUri());
        Assert.assertEquals(confFactory.getPhysicalScheme(), "file");
        Assert.assertEquals(confFactory.getDefaultRootDir().toString(),
                "file:" + System.getProperty("user.home"));

        // Valid path
        SimpleHDFSConfigStore store1 = confFactory.createConfigStore(new URI("simple-file:/d"));
        Assert.assertEquals(store1.getStoreURI().getScheme(), confFactory.getScheme());
        Assert.assertEquals(store1.getStoreURI().getAuthority(),
                confFactory.getDefaultStoreURI().getAuthority());
        Assert.assertEquals(store1.getStoreURI().getPath(), confFactory.getDefaultStoreURI().getPath());

        // Invalid path
        Config confConf2 = ConfigFactory.empty().withValue(SimpleHDFSConfigStoreFactory.DEFAULT_STORE_URI_KEY,
                ConfigValueFactory.fromAnyRef(testRoot.toString()));
        try {
            new SimpleHDFSConfigStoreFactory(confConf2);
            Assert.fail("Exception expected");
        } catch (IllegalArgumentException e) {
            Assert.assertTrue(e.getMessage().contains("Path does not appear to be a config store root"));
        }

        // Empty path
        Config confConf3 = ConfigFactory.empty().withValue(SimpleHDFSConfigStoreFactory.DEFAULT_STORE_URI_KEY,
                ConfigValueFactory.fromAnyRef(""));
        try {
            new SimpleHDFSConfigStoreFactory(confConf3);
            Assert.fail("Exception expected");
        } catch (IllegalArgumentException e) {
            Assert.assertTrue(e.getMessage().contains("Default store URI should be non-empty"));
        }
    } finally {
        localFS.delete(testRoot, true);
    }
}

From source file:gobblin.data.management.copy.CopyContext.java

License:Apache License

/**
 * Get cached {@link FileStatus}./*from   ww  w .j  a  v  a2  s .c  o m*/
 */
public Optional<FileStatus> getFileStatus(final FileSystem fs, final Path path) throws IOException {
    try {
        return this.fileStatusCache.get(fs.makeQualified(path), new Callable<Optional<FileStatus>>() {
            @Override
            public Optional<FileStatus> call() throws Exception {
                try {
                    return Optional.of(fs.getFileStatus(path));
                } catch (FileNotFoundException fnfe) {
                    return Optional.absent();
                }
            }
        });
    } catch (ExecutionException ee) {
        throw new IOException(ee.getCause());
    }
}

From source file:gobblin.data.management.copy.hive.HiveTargetPathHelper.java

License:Apache License

/**
 * Compute the target {@link Path} for a file or directory copied by Hive distcp.
 *
 * <p>/*from   w w  w . j a v  a  2s .c  om*/
 *   The target locations of data files for this table depend on the values of the resolved table root (e.g.
 *   the value of {@link #COPY_TARGET_TABLE_ROOT} with tokens replaced) and {@link #RELOCATE_DATA_FILES_KEY}:
 *   * if {@link #RELOCATE_DATA_FILES_KEY} is true, then origin file /path/to/file/myFile will be written to
 *     /resolved/table/root/<partition>/myFile
 *   * if {@link #COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED} and {@link #COPY_TARGET_TABLE_PREFIX_REPLACEMENT} are defined,
 *     then the specified prefix in each file will be replaced by the specified replacement.
 *   * otherwise, if the resolved table root is defined (e.g. {@link #COPY_TARGET_TABLE_ROOT} is defined in the
 *     properties), we define:
 *     origin_table_root := the deepest non glob ancestor of table.getSc().getLocation() iff getLocation() points to
 *                           a single glob. (e.g. /path/to/*&#47;files -> /path/to). If getLocation() contains none
 *                           or multiple globs, job will fail.
 *     relative_path := path of the file relative to origin_table_root. If the path of the file is not a descendant
 *                      of origin_table_root, job will fail.
 *     target_path := /resolved/table/root/relative/path
 *     This mode is useful when moving a table with a complicated directory structure to a different base directory.
 *   * otherwise the target is identical to the origin path.
 * </p>
 *
 *
 * @param sourcePath Source path to be transformed.
 * @param targetFs target {@link FileSystem}
 * @param partition partition this file belongs to.
 * @param isConcreteFile true if this is a path to an existing file in HDFS.
 */
public Path getTargetPath(Path sourcePath, FileSystem targetFs, Optional<Partition> partition,
        boolean isConcreteFile) {
    if (this.relocateDataFiles) {
        Preconditions.checkArgument(this.targetTableRoot.isPresent(), "Must define %s to relocate data files.",
                COPY_TARGET_TABLE_ROOT);
        Path path = this.targetTableRoot.get();
        if (partition.isPresent()) {
            path = addPartitionToPath(path, partition.get());
        }
        if (!isConcreteFile) {
            return targetFs.makeQualified(path);
        }
        return targetFs.makeQualified(new Path(path, sourcePath.getName()));
    }

    // both prefixs must be present as the same time
    // can not used with option {@link #COPY_TARGET_TABLE_ROOT}
    if (this.targetTablePrefixTobeReplaced.isPresent() || this.targetTablePrefixReplacement.isPresent()) {
        Preconditions.checkState(this.targetTablePrefixTobeReplaced.isPresent(),
                String.format("Must specify both %s option and %s option together",
                        COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED, COPY_TARGET_TABLE_PREFIX_REPLACEMENT));
        Preconditions.checkState(this.targetTablePrefixReplacement.isPresent(),
                String.format("Must specify both %s option and %s option together",
                        COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED, COPY_TARGET_TABLE_PREFIX_REPLACEMENT));

        Preconditions.checkState(!this.targetTableRoot.isPresent(),
                String.format("Can not specify the option %s with option %s ", COPY_TARGET_TABLE_ROOT,
                        COPY_TARGET_TABLE_PREFIX_REPLACEMENT));

        Path targetPathWithoutSchemeAndAuthority = HiveCopyEntityHelper.replacedPrefix(sourcePath,
                this.targetTablePrefixTobeReplaced.get(), this.targetTablePrefixReplacement.get());
        return targetFs.makeQualified(targetPathWithoutSchemeAndAuthority);
    } else if (this.targetTableRoot.isPresent()) {
        Preconditions.checkArgument(this.dataset.getTableRootPath().isPresent(),
                "Cannot move paths to a new root unless table has exactly one location.");
        Preconditions.checkArgument(PathUtils.isAncestor(this.dataset.getTableRootPath().get(), sourcePath),
                "When moving paths to a new root, all locations must be descendants of the table root location. "
                        + "Table root location: %s, file location: %s.",
                this.dataset.getTableRootPath(), sourcePath);

        Path relativePath = PathUtils.relativizePath(sourcePath, this.dataset.getTableRootPath().get());
        return targetFs.makeQualified(new Path(this.targetTableRoot.get(), relativePath));
    } else {
        return targetFs.makeQualified(PathUtils.getPathWithoutSchemeAndAuthority(sourcePath));
    }
}

From source file:gobblin.data.management.copy.predicates.RegistrationTimeSkipPredicateTest.java

License:Apache License

@Test
public void test() throws Exception {

    Path partition1Path = new Path("/path/to/partition1");
    long modTime = 100000;

    CopyContext copyContext = new CopyContext();
    CopyConfiguration copyConfiguration = Mockito.mock(CopyConfiguration.class);
    Mockito.doReturn(copyContext).when(copyConfiguration).getCopyContext();
    HiveDataset dataset = Mockito.mock(HiveDataset.class);
    FileSystem fs = Mockito.spy(FileSystem.getLocal(new Configuration()));
    FileStatus status = new FileStatus(1, false, 1, 1, modTime, partition1Path);
    Path qualifiedPath = fs.makeQualified(partition1Path);
    Mockito.doReturn(status).when(fs).getFileStatus(qualifiedPath);
    Mockito.doReturn(status).when(fs).getFileStatus(partition1Path);
    Mockito.doReturn(fs).when(dataset).getFs();

    HiveCopyEntityHelper helper = Mockito.mock(HiveCopyEntityHelper.class);
    Mockito.doReturn(copyConfiguration).when(helper).getConfiguration();
    Mockito.doReturn(dataset).when(helper).getDataset();

    RegistrationTimeSkipPredicate predicate = new RegistrationTimeSkipPredicate(helper);

    // partition exists, but registration time before modtime => don't skip
    HivePartitionFileSet pc = createPartitionCopy(partition1Path, modTime - 1, true);
    Assert.assertFalse(predicate.apply(pc));

    // partition exists, registration time equal modtime => don't skip
    pc = createPartitionCopy(partition1Path, modTime, true);
    Assert.assertFalse(predicate.apply(pc));

    // partition exists, registration time larger modtime => do skip
    pc = createPartitionCopy(partition1Path, modTime + 1, true);
    Assert.assertTrue(predicate.apply(pc));

    // partition doesn't exist => don't skip
    pc = createPartitionCopy(partition1Path, modTime + 1, false);
    Assert.assertFalse(predicate.apply(pc));

    // partition exists but is not annotated => don't skip
    pc = createPartitionCopy(partition1Path, modTime + 1, true);
    pc.getExistingTargetPartition().get().getParameters().clear();
    Assert.assertFalse(predicate.apply(pc));

}

From source file:gobblin.data.management.copy.replication.ConfigBasedDataset.java

License:Apache License

private void calculateDatasetURN() {
    EndPoint e = this.copyRoute.getCopyTo();
    if (e instanceof HadoopFsEndPoint) {
        HadoopFsEndPoint copyTo = (HadoopFsEndPoint) e;
        Configuration conf = HadoopUtils.newConfiguration();
        try {//from  ww w.ja va  2 s  .  c o  m
            FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf);
            this.datasetURN = copyToFs.makeQualified(copyTo.getDatasetPath()).toString();
        } catch (IOException e1) {
            // ignored
        }
    }

    this.datasetURN = e.toString();
}

From source file:gobblin.data.management.copy.replication.ConfigBasedDataset.java

License:Apache License

@Override
public Collection<? extends CopyEntity> getCopyableFiles(FileSystem targetFs,
        CopyConfiguration copyConfiguration) throws IOException {
    List<CopyEntity> copyableFiles = Lists.newArrayList();
    EndPoint copyFromRaw = copyRoute.getCopyFrom();
    EndPoint copyToRaw = copyRoute.getCopyTo();
    if (!(copyFromRaw instanceof HadoopFsEndPoint && copyToRaw instanceof HadoopFsEndPoint)) {
        log.warn("Currently only handle the Hadoop Fs EndPoint replication");
        return copyableFiles;
    }/*from w w w .j  a v a  2  s  .  c  o  m*/

    if ((!copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent())
            || (copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent()
                    && copyFromRaw.getWatermark().get().compareTo(copyToRaw.getWatermark().get()) <= 0)) {
        log.info(
                "No need to copy as destination watermark >= source watermark with source watermark {}, for dataset with metadata {}",
                copyFromRaw.getWatermark().isPresent() ? copyFromRaw.getWatermark().get().toJson() : "N/A",
                this.rc.getMetaData());
        return copyableFiles;
    }

    HadoopFsEndPoint copyFrom = (HadoopFsEndPoint) copyFromRaw;
    HadoopFsEndPoint copyTo = (HadoopFsEndPoint) copyToRaw;
    Configuration conf = HadoopUtils.newConfiguration();
    FileSystem copyFromFs = FileSystem.get(copyFrom.getFsURI(), conf);
    FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf);

    Collection<FileStatus> allFilesInSource = copyFrom.getFiles();
    Collection<FileStatus> allFilesInTarget = copyTo.getFiles();

    final PathFilter pathFilter = DatasetUtils.instantiatePathFilter(this.props);
    Predicate<FileStatus> predicate = new Predicate<FileStatus>() {
        @Override
        public boolean apply(FileStatus input) {
            return pathFilter.accept(input.getPath());
        }
    };

    Set<FileStatus> copyFromFileStatuses = Sets.newHashSet(Collections2.filter(allFilesInSource, predicate));
    Map<Path, FileStatus> copyToFileMap = Maps.newHashMap();
    for (FileStatus f : allFilesInTarget) {
        if (pathFilter.accept(f.getPath())) {
            copyToFileMap.put(PathUtils.getPathWithoutSchemeAndAuthority(f.getPath()), f);
        }
    }

    Collection<Path> deletedPaths = Lists.newArrayList();

    boolean watermarkMetadataCopied = false;

    boolean deleteTargetIfNotExistOnSource = rc.isDeleteTargetIfNotExistOnSource();

    for (FileStatus originFileStatus : copyFromFileStatuses) {
        Path relative = PathUtils.relativizePath(
                PathUtils.getPathWithoutSchemeAndAuthority(originFileStatus.getPath()),
                PathUtils.getPathWithoutSchemeAndAuthority(copyFrom.getDatasetPath()));
        // construct the new path in the target file system
        Path newPath = new Path(copyTo.getDatasetPath(), relative);

        if (relative.toString().equals(ReplicaHadoopFsEndPoint.WATERMARK_FILE)) {
            watermarkMetadataCopied = true;
        }

        // skip copy same file
        if (copyToFileMap.containsKey(newPath)
                && copyToFileMap.get(newPath).getLen() == originFileStatus.getLen()
                && copyToFileMap.get(newPath).getModificationTime() > originFileStatus.getModificationTime()) {
            log.debug(
                    "Copy from timestamp older than copy to timestamp, skipped copy {} for dataset with metadata {}",
                    originFileStatus.getPath(), this.rc.getMetaData());
        } else {
            // need to remove those files in the target File System
            if (copyToFileMap.containsKey(newPath)) {
                deletedPaths.add(newPath);
            }

            copyableFiles.add(CopyableFile
                    .fromOriginAndDestination(copyFromFs, originFileStatus, copyToFs.makeQualified(newPath),
                            copyConfiguration)
                    .fileSet(PathUtils.getPathWithoutSchemeAndAuthority(copyTo.getDatasetPath()).toString())
                    .build());

        }

        // clean up already checked paths
        copyToFileMap.remove(newPath);
    }

    // delete the paths on target directory if NOT exists on source
    if (deleteTargetIfNotExistOnSource) {
        deletedPaths.addAll(copyToFileMap.keySet());
    }

    // delete old files first
    if (!deletedPaths.isEmpty()) {
        DeleteFileCommitStep deleteCommitStep = DeleteFileCommitStep.fromPaths(copyToFs, deletedPaths,
                this.props);
        copyableFiles.add(new PrePublishStep(copyTo.getDatasetPath().toString(),
                Maps.<String, String>newHashMap(), deleteCommitStep, 0));
    }

    // generate the watermark file
    if ((!watermarkMetadataCopied) && copyFrom.getWatermark().isPresent()) {
        copyableFiles
                .add(new PostPublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(),
                        new WatermarkMetadataGenerationCommitStep(copyTo.getFsURI().toString(),
                                copyTo.getDatasetPath(), copyFrom.getWatermark().get()),
                        1));
    }

    return copyableFiles;
}

From source file:gobblin.data.management.copy.replication.HadoopFsEndPointDataset.java

License:Apache License

public HadoopFsEndPointDataset(HadoopFsEndPoint endPoint) {
    this.endPoint = endPoint;
    Configuration conf = HadoopUtils.newConfiguration();
    try {//from  w w w  . j a va 2  s  .c o m
        FileSystem fs = FileSystem.get(this.endPoint.getFsURI(), conf);
        qualifiedDatasetRoot = fs.makeQualified(this.endPoint.getDatasetPath());
    } catch (IOException e1) {
        // ignored
        qualifiedDatasetRoot = this.endPoint.getDatasetPath();
    }
}

From source file:gobblin.data.management.trash.MockTrashTest.java

License:Apache License

@Test
public void MockTrashTest() throws IOException {

    FileSystem fs = mock(FileSystem.class);

    Path homeDirectory = new Path("/home/directory");
    when(fs.getHomeDirectory()).thenReturn(homeDirectory);
    when(fs.makeQualified(any(Path.class))).thenAnswer(new Answer<Path>() {
        @Override/*  w ww  . ja v a 2 s. co  m*/
        public Path answer(InvocationOnMock invocation) throws Throwable {
            return (Path) invocation.getArguments()[0];
        }
    });

    Trash trash = new MockTrash(fs, new Properties(), "user");

    Assert.assertTrue(trash.moveToTrash(new Path("/some/path")));

    verify(fs).getHomeDirectory();
    verify(fs).makeQualified(any(Path.class));
    verifyNoMoreInteractions(fs);

}

From source file:gobblin.data.management.trash.Trash.java

License:Apache License

/**
 * Create location of Trash directory. Parsed from props at key {@link #TRASH_LOCATION_KEY}, defaulting to
 * /home/directory/_GOBBLIN_TRASH./* w w w  . ja v  a2  s.  co  m*/
 * @param fs {@link org.apache.hadoop.fs.FileSystem} where trash should be found.
 * @param props {@link java.util.Properties} containing trash configuration.
 * @param user If the trash location contains the token $USER, the token will be replaced by the value of user.
 * @return {@link org.apache.hadoop.fs.Path} for trash directory.
 * @throws java.io.IOException
 */
protected Path createTrashLocation(FileSystem fs, Properties props, String user) throws IOException {
    Path trashLocation;
    if (props.containsKey(TRASH_LOCATION_KEY)) {
        trashLocation = new Path(props.getProperty(TRASH_LOCATION_KEY).replaceAll("\\$USER", user));
    } else {
        trashLocation = new Path(fs.getHomeDirectory(), DEFAULT_TRASH_DIRECTORY);
        LOG.info("Using default trash location at " + trashLocation);
    }
    if (!trashLocation.isAbsolute()) {
        throw new IllegalArgumentException(
                "Trash location must be absolute. Found " + trashLocation.toString());
    }
    Path qualifiedTrashLocation = fs.makeQualified(trashLocation);
    ensureTrashLocationExists(fs, qualifiedTrashLocation);
    return qualifiedTrashLocation;
}