Example usage for org.apache.hadoop.fs FileSystem getUri

List of usage examples for org.apache.hadoop.fs FileSystem getUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getUri.

Prototype

public abstract URI getUri();

Source Link

Document

Returns a URI which identifies this FileSystem.

Usage

From source file:edu.berkeley.chukwa_xtrace.TestXtrExtract.java

License:Apache License

public void testArchiving() throws Exception {

    System.out.println("starting archive test");
    Configuration conf = new Configuration();
    System.setProperty("hadoop.log.dir", System.getProperty("test.build.data", "/tmp"));
    MiniDFSCluster dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null);
    FileSystem fileSys = dfs.getFileSystem();
    fileSys.delete(OUTPUT_DIR, true);//nuke output dir

    writeASinkFile(conf, fileSys, INPUT_DIR, 1000);

    FileStatus fstat = fileSys.getFileStatus(INPUT_DIR);
    assertTrue(fstat.getLen() > 10);/*  w ww.j a v a  2 s . c  om*/

    System.out.println("filesystem is " + fileSys.getUri());
    conf.set("fs.default.name", fileSys.getUri().toString());
    conf.setInt("io.sort.mb", 1);
    conf.setInt("io.sort.factor", 5);
    conf.setInt("mapred.tasktracker.map.tasks.maximum", 2);
    conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2);

    MiniMRCluster mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1);
    String[] archiveArgs = { INPUT_DIR.toString(), fileSys.getUri().toString() + OUTPUT_DIR.toString() };

    JobConf jc = mr.createJobConf(new JobConf(conf));
    assertEquals("true", jc.get("archive.groupByClusterName"));
    assertEquals(1, jc.getInt("io.sort.mb", 5));

    int returnVal = ToolRunner.run(jc, new XtrExtract(), archiveArgs);
    assertEquals(0, returnVal);
    fstat = fileSys.getFileStatus(new Path("/chukwa/archives/foocluster/HadoopLogProcessor_2008_05_29.arc"));
    assertTrue(fstat.getLen() > 10);

    Thread.sleep(1000);

    System.out.println("done!");
}

From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java

License:Apache License

private void checkPath(FileSystem fs, Path path) throws PermissionException, NotFoundException, IOException {
    if (_disallowLocalInput) {
        // If we update to Hadoop 1.0, we should use the canonical URI which is definitely unique to each file system. However, the normal one should be, too.
        if (fs.getUri().equals(FileSystem.getLocal(new Configuration()).getUri())) {
            throw new PermissionException("Not allowed to read from the local file system.");
        }//  www . j  a  va2  s  .  c om
    }

    if (!fs.exists(path))
        throw new NotFoundException("Input path does not exist: " + path.toString());

    if (_enforceInputContainment) {
        // Check that path is inside home directory
        Path relativePath = relativizePath(_homeDir, path);
        if (relativePath.isAbsolute())
            ; // Has authority or begins with "/"
        throw new PermissionException("Not allowed to read outside the " + "WebMapReduce home directory ("
                + _homeDir.toString() + "). Please specify a relative path.");
    }
}

From source file:es.tid.cosmos.platform.injection.server.HadoopFileSystemViewTest.java

License:Open Source License

/**
 * //  ww  w .j a  v a 2s  . c  om
 * @throws Exception
 */
@Test
public void testGetFileWithEmptyFilename() throws Exception {
    HadoopSshFile file = this.hadoopFileSystemView.getFile("");
    FileSystem hadoopFS = FileSystem.get(URI.create(this.conf.get("fs.default.name")),
            this.conf);/*, this.userName);*/
    String homePath = hadoopFS.getHomeDirectory().toString().replaceFirst(hadoopFS.getUri().toString(), "");
    assertEquals(homePath, file.getAbsolutePath());
    assertEquals(homePath.substring(homePath.lastIndexOf("/") + 1), file.getName());
}

From source file:es.tid.cosmos.platform.injection.server.HadoopFileSystemViewTest.java

License:Open Source License

/**
 * //from   ww  w . ja va 2s.c  o m
 * @throws Exception
 */
@Test
public void testRedirectionToHomePath() throws Exception {
    FileSystem fs = FileSystem.get(URI.create(this.conf.get("fs.default.name")), this.conf);//, "test");
    String homePath = fs.getHomeDirectory().toString().replaceFirst(fs.getUri().toString(), "");
    HadoopSshFile init = this.hadoopFileSystemView.getFile(new HadoopSshFile(".", "test", fs), ".");
    assertEquals(homePath, init.getAbsolutePath());
}

From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java

License:Apache License

/**
 * Constructs a {@link SimpleHadoopFilesystemConfigStore} using a given {@link FileSystem} and a {@link URI} that points to the
 * physical location of the store root./*  ww w .j  av  a 2s.com*/
 *
 * @param fs the {@link FileSystem} the {@link ConfigStore} is stored on.
 * @param physicalStoreRoot the fully qualified {@link URI} of the physical store root, the {@link URI#getScheme()} of the
 *                          {@link URI} should match the {@link FileSystem#getScheme()} of the given {@link FileSystem}.
 * @param logicalStoreRoot the fully qualfied {@link URI} of the logical store root
 */
protected SimpleHadoopFilesystemConfigStore(FileSystem fs, URI physicalStoreRoot, URI logicalStoreRoot) {
    Preconditions.checkNotNull(fs, "fs cannot be null!");
    Preconditions.checkNotNull(physicalStoreRoot, "physicalStoreRoot cannot be null!");
    Preconditions.checkNotNull(logicalStoreRoot, "logicalStoreRoot cannot be null!");

    this.fs = fs;

    Preconditions.checkArgument(!Strings.isNullOrEmpty(physicalStoreRoot.getScheme()),
            "The physicalStoreRoot must have a valid scheme!");
    Preconditions.checkArgument(physicalStoreRoot.getScheme().equals(fs.getUri().getScheme()),
            "The scheme of the physicalStoreRoot and the filesystem must match!");
    Preconditions.checkArgument(!Strings.isNullOrEmpty(physicalStoreRoot.getPath()),
            "The path of the physicalStoreRoot must be valid as it is the root of the store!");

    this.physicalStoreRoot = physicalStoreRoot;
    this.logicalStoreRoot = logicalStoreRoot;
    this.versions = CacheBuilder.newBuilder().build();
    this.storeMetadata = new SimpleHDFSStoreMetadata(fs,
            new Path(new Path(this.physicalStoreRoot), CONFIG_STORE_NAME));
}

From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStoreFactory.java

License:Apache License

/**
 * This method determines the physical location of the {@link SimpleHadoopFilesystemConfigStore} root directory on HDFS. It does
 * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory
 * contains the folder {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root
 * directory.//www  .java 2  s  . c  om
 *
 * <p>
 *   If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does
 *   not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If
 *   the default root dir does not contain the {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME} then a
 *   {@link ConfigStoreCreationException} is thrown.
 * </p>
 */
private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException {
    if (Strings.isNullOrEmpty(configKey.getAuthority())) {
        if (getDefaultStoreURILazy() != null) {
            return getDefaultStoreURILazy();
        } else if (isAuthorityRequired()) {
            throw new ConfigStoreCreationException(configKey, "No default store has been configured.");
        }
    }

    Path path = new Path(configKey.getPath());

    while (path != null) {
        try {
            // the abs URI may point to an unexist path for
            // 1. phantom node
            // 2. as URI did not specify the version
            if (fs.exists(path)) {
                for (FileStatus fileStatus : fs.listStatus(path)) {
                    if (fileStatus.isDirectory() && fileStatus.getPath().getName()
                            .equals(SimpleHadoopFilesystemConfigStore.CONFIG_STORE_NAME)) {
                        return fs.getUri().resolve(fileStatus.getPath().getParent().toUri());
                    }
                }
            }
        } catch (IOException e) {
            throw new ConfigStoreCreationException(configKey, e);
        }

        path = path.getParent();
    }
    throw new ConfigStoreCreationException(configKey, "Cannot find the store root!");
}

From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStore.java

License:Apache License

/**
 * Constructs a {@link SimpleHDFSConfigStore} using a given {@link FileSystem} and a {@link URI} that points to the
 * physical location of the store root./*from w w  w  .  ja  va2s  . c o  m*/
 *
 * @param fs the {@link FileSystem} the {@link ConfigStore} is stored on.
 * @param physicalStoreRoot the fully qualified {@link URI} of the physical store root, the {@link URI#getScheme()} of the
 *                          {@link URI} should match the {@link FileSystem#getScheme()} of the given {@link FileSystem}.
 * @param logicalStoreRoot the fully qualfied {@link URI} of the logical store root
 */
protected SimpleHDFSConfigStore(FileSystem fs, URI physicalStoreRoot, URI logicalStoreRoot) {
    Preconditions.checkNotNull(fs, "fs cannot be null!");
    Preconditions.checkNotNull(physicalStoreRoot, "physicalStoreRoot cannot be null!");
    Preconditions.checkNotNull(logicalStoreRoot, "logicalStoreRoot cannot be null!");

    this.fs = fs;

    Preconditions.checkArgument(!Strings.isNullOrEmpty(physicalStoreRoot.getScheme()),
            "The physicalStoreRoot must have a valid scheme!");
    Preconditions.checkArgument(physicalStoreRoot.getScheme().equals(fs.getUri().getScheme()),
            "The scheme of the physicalStoreRoot and the filesystem must match!");
    Preconditions.checkArgument(!Strings.isNullOrEmpty(physicalStoreRoot.getPath()),
            "The path of the physicalStoreRoot must be valid as it is the root of the store!");

    this.physicalStoreRoot = physicalStoreRoot;
    this.logicalStoreRoot = logicalStoreRoot;
    this.versions = CacheBuilder.newBuilder().build();
    this.storeMetadata = new SimpleHDFSStoreMetadata(fs,
            new Path(new Path(this.physicalStoreRoot), CONFIG_STORE_NAME));
}

From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStoreFactory.java

License:Apache License

/**
 * This method determines the physical location of the {@link SimpleHDFSConfigStore} root directory on HDFS. It does
 * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory
 * contains the folder {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root
 * directory./*from  ww  w. j  a va  2 s  .c  o  m*/
 *
 * <p>
 *   If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does
 *   not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If
 *   the default root dir does not contain the {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME} then a
 *   {@link ConfigStoreCreationException} is thrown.
 * </p>
 */
private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException {
    if (Strings.isNullOrEmpty(configKey.getAuthority())) {
        if (!hasDefaultStoreURI()) {
            throw new ConfigStoreCreationException(configKey, "No default store has been configured.");
        }
        return this.defaultStoreURI.get();
    }

    Path path = new Path(configKey.getPath());

    while (path != null) {
        try {
            // the abs URI may point to an unexist path for
            // 1. phantom node
            // 2. as URI did not specify the version
            if (fs.exists(path)) {
                for (FileStatus fileStatus : fs.listStatus(path)) {
                    if (fileStatus.isDirectory()
                            && fileStatus.getPath().getName().equals(SimpleHDFSConfigStore.CONFIG_STORE_NAME)) {
                        return fs.getUri().resolve(fileStatus.getPath().getParent().toUri());
                    }
                }
            }
        } catch (IOException e) {
            throw new ConfigStoreCreationException(configKey, e);
        }

        path = path.getParent();
    }
    throw new ConfigStoreCreationException(configKey, "Cannot find the store root!");
}

From source file:gobblin.data.management.copy.CopySource.java

License:Apache License

/**
 * <ul>//w w w . j  a  va2 s.  co  m
 * Does the following:
 * <li>Instantiate a {@link DatasetsFinder}.
 * <li>Find all {@link Dataset} using {@link DatasetsFinder}.
 * <li>For each {@link CopyableDataset} get all {@link CopyEntity}s.
 * <li>Create a {@link WorkUnit} per {@link CopyEntity}.
 * </ul>
 *
 * <p>
 * In this implementation, one workunit is created for every {@link CopyEntity} found. But the extractor/converters
 * and writers are built to support multiple {@link CopyEntity}s per workunit
 * </p>
 *
 * @param state see {@link gobblin.configuration.SourceState}
 * @return Work units for copying files.
 */
@Override
public List<WorkUnit> getWorkunits(final SourceState state) {

    this.metricContext = Instrumented.getMetricContext(state, CopySource.class);

    try {

        DeprecationUtils.renameDeprecatedKeys(state,
                CopyConfiguration.MAX_COPY_PREFIX + "." + CopyResourcePool.ENTITIES_KEY,
                Lists.newArrayList(MAX_FILES_COPIED_KEY));

        final FileSystem sourceFs = getSourceFileSystem(state);
        final FileSystem targetFs = getTargetFileSystem(state);

        log.info("Identified source file system at {} and target file system at {}.", sourceFs.getUri(),
                targetFs.getUri());

        long maxSizePerBin = state.getPropAsLong(MAX_SIZE_MULTI_WORKUNITS, 0);
        long maxWorkUnitsPerMultiWorkUnit = state.getPropAsLong(MAX_WORK_UNITS_PER_BIN, 50);
        final long minWorkUnitWeight = Math.max(1, maxSizePerBin / maxWorkUnitsPerMultiWorkUnit);
        final Optional<CopyableFileWatermarkGenerator> watermarkGenerator = CopyableFileWatermarkHelper
                .getCopyableFileWatermarkGenerator(state);
        int maxThreads = state.getPropAsInt(MAX_CONCURRENT_LISTING_SERVICES,
                DEFAULT_MAX_CONCURRENT_LISTING_SERVICES);

        final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties())
                .build();

        DatasetsFinder<CopyableDatasetBase> datasetFinder = DatasetUtils.instantiateDatasetFinder(
                state.getProperties(), sourceFs, DEFAULT_DATASET_PROFILE_CLASS_KEY,
                new EventSubmitter.Builder(this.metricContext, CopyConfiguration.COPY_PREFIX).build(), state);

        IterableDatasetFinder<CopyableDatasetBase> iterableDatasetFinder = datasetFinder instanceof IterableDatasetFinder
                ? (IterableDatasetFinder<CopyableDatasetBase>) datasetFinder
                : new IterableDatasetFinderImpl<>(datasetFinder);

        Iterator<CopyableDatasetRequestor> requestorIteratorWithNulls = Iterators.transform(
                iterableDatasetFinder.getDatasetsIterator(),
                new CopyableDatasetRequestor.Factory(targetFs, copyConfiguration, log));
        Iterator<CopyableDatasetRequestor> requestorIterator = Iterators.filter(requestorIteratorWithNulls,
                Predicates.<CopyableDatasetRequestor>notNull());

        final SetMultimap<FileSet<CopyEntity>, WorkUnit> workUnitsMap = Multimaps
                .<FileSet<CopyEntity>, WorkUnit>synchronizedSetMultimap(
                        HashMultimap.<FileSet<CopyEntity>, WorkUnit>create());

        RequestAllocator<FileSet<CopyEntity>> allocator = createRequestAllocator(copyConfiguration, maxThreads);
        Iterator<FileSet<CopyEntity>> prioritizedFileSets = allocator.allocateRequests(requestorIterator,
                copyConfiguration.getMaxToCopy());

        Iterator<Callable<Void>> callableIterator = Iterators.transform(prioritizedFileSets,
                new Function<FileSet<CopyEntity>, Callable<Void>>() {
                    @Nullable
                    @Override
                    public Callable<Void> apply(FileSet<CopyEntity> input) {
                        return new FileSetWorkUnitGenerator((CopyableDatasetBase) input.getDataset(), input,
                                state, workUnitsMap, watermarkGenerator, minWorkUnitWeight);
                    }
                });

        try {
            List<Future<Void>> futures = new IteratorExecutor<>(callableIterator, maxThreads, ExecutorsUtils
                    .newDaemonThreadFactory(Optional.of(log), Optional.of("Copy-file-listing-pool-%d")))
                            .execute();

            for (Future<Void> future : futures) {
                try {
                    future.get();
                } catch (ExecutionException exc) {
                    log.error("Failed to get work units for dataset.", exc.getCause());
                }
            }
        } catch (InterruptedException ie) {
            log.error("Retrieval of work units was interrupted. Aborting.");
            return Lists.newArrayList();
        }

        log.info(String.format("Created %s workunits ", workUnitsMap.size()));

        copyConfiguration.getCopyContext().logCacheStatistics();

        if (state.contains(SIMULATE) && state.getPropAsBoolean(SIMULATE)) {
            log.info("Simulate mode enabled. Will not execute the copy.");
            for (Map.Entry<FileSet<CopyEntity>, Collection<WorkUnit>> entry : workUnitsMap.asMap().entrySet()) {
                log.info(String.format("Actions for dataset %s file set %s.",
                        entry.getKey().getDataset().datasetURN(), entry.getKey().getName()));
                for (WorkUnit workUnit : entry.getValue()) {
                    CopyEntity copyEntity = deserializeCopyEntity(workUnit);
                    log.info(copyEntity.explain());
                }
            }
            return Lists.newArrayList();
        }

        List<? extends WorkUnit> workUnits = new WorstFitDecreasingBinPacking(maxSizePerBin)
                .pack(Lists.newArrayList(workUnitsMap.values()), this.weighter);
        log.info(String.format(
                "Bin packed work units. Initial work units: %d, packed work units: %d, max weight per bin: %d, "
                        + "max work units per bin: %d.",
                workUnitsMap.size(), workUnits.size(), maxSizePerBin, maxWorkUnitsPerMultiWorkUnit));
        return ImmutableList.copyOf(workUnits);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:gobblin.data.management.copy.replication.ConfigBasedDatasetsTest.java

License:Apache License

@Test
public void testGetCopyableFiles() throws Exception {
    String sourceDir = getClass().getClassLoader().getResource("configBasedDatasetTest/src").getFile();
    String destinationDir = getClass().getClassLoader().getResource("configBasedDatasetTest/dest").getFile();
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    URI local = localFs.getUri();
    long sourceWatermark = 100L;

    Properties properties = new Properties();
    properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");

    CopyConfiguration copyConfiguration = CopyConfiguration
            .builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path(destinationDir))
            .preserve(PreserveAttributes.fromMnemonicString("ugp")).build();

    ReplicationMetaData mockMetaData = Mockito.mock(ReplicationMetaData.class);
    Mockito.when(mockMetaData.toString()).thenReturn("Mock Meta Data");

    ReplicationConfiguration mockRC = Mockito.mock(ReplicationConfiguration.class);
    Mockito.when(mockRC.getCopyMode()).thenReturn(ReplicationCopyMode.PULL);
    Mockito.when(mockRC.getMetaData()).thenReturn(mockMetaData);

    HadoopFsEndPoint copyFrom = Mockito.mock(HadoopFsEndPoint.class);
    Mockito.when(copyFrom.getDatasetPath()).thenReturn(new Path(sourceDir));
    Mockito.when(copyFrom.getFsURI()).thenReturn(local);
    ComparableWatermark sw = new LongWatermark(sourceWatermark);
    Mockito.when(copyFrom.getWatermark()).thenReturn(Optional.of(sw));
    Mockito.when(copyFrom.getFiles())//from   w ww. j  a v  a2 s . co  m
            .thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(sourceDir)));

    HadoopFsEndPoint copyTo = Mockito.mock(HadoopFsEndPoint.class);
    Mockito.when(copyTo.getDatasetPath()).thenReturn(new Path(destinationDir));
    Mockito.when(copyTo.getFsURI()).thenReturn(local);
    Optional<ComparableWatermark> tmp = Optional.absent();
    Mockito.when(copyTo.getWatermark()).thenReturn(tmp);
    Mockito.when(copyTo.getFiles())
            .thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(destinationDir)));

    CopyRoute route = Mockito.mock(CopyRoute.class);
    Mockito.when(route.getCopyFrom()).thenReturn(copyFrom);
    Mockito.when(route.getCopyTo()).thenReturn(copyTo);

    ConfigBasedDataset dataset = new ConfigBasedDataset(mockRC, properties, route);

    Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(localFs, copyConfiguration);
    Assert.assertEquals(copyableFiles.size(), 6);

    Set<Path> paths = Sets.newHashSet(new Path("dir1/file2"), new Path("dir1/file1"), new Path("dir2/file1"),
            new Path("dir2/file3"));
    for (CopyEntity copyEntity : copyableFiles) {
        if (copyEntity instanceof CopyableFile) {
            CopyableFile file = (CopyableFile) copyEntity;
            Path originRelativePath = PathUtils.relativizePath(
                    PathUtils.getPathWithoutSchemeAndAuthority(file.getOrigin().getPath()),
                    PathUtils.getPathWithoutSchemeAndAuthority(new Path(sourceDir)));
            Path targetRelativePath = PathUtils.relativizePath(
                    PathUtils.getPathWithoutSchemeAndAuthority(file.getDestination()),
                    PathUtils.getPathWithoutSchemeAndAuthority(new Path(destinationDir)));

            Assert.assertTrue(paths.contains(originRelativePath));
            Assert.assertTrue(paths.contains(targetRelativePath));
            Assert.assertEquals(originRelativePath, targetRelativePath);
        } else if (copyEntity instanceof PrePublishStep) {
            PrePublishStep pre = (PrePublishStep) copyEntity;
            Assert.assertTrue(pre.getStep() instanceof DeleteFileCommitStep);
            // need to delete this file
            Assert.assertTrue(pre.explain().indexOf("configBasedDatasetTest/dest/dir1/file1") > 0);
        } else if (copyEntity instanceof PostPublishStep) {
            PostPublishStep post = (PostPublishStep) copyEntity;
            Assert.assertTrue(post.getStep() instanceof WatermarkMetadataGenerationCommitStep);
            Assert.assertTrue(post.explain().indexOf("dest/_metadata") > 0
                    && post.explain().indexOf("" + sourceWatermark) > 0);
        } else {
            throw new Exception("Wrong type");
        }
    }
}