Example usage for org.apache.hadoop.fs FileSystem getUri

List of usage examples for org.apache.hadoop.fs FileSystem getUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getUri.

Prototype

public abstract URI getUri();

Source Link

Document

Returns a URI which identifies this FileSystem.

Usage

From source file:org.apache.giraph.debugger.utils.AsyncHDFSWriteService.java

License:Apache License

/**
 * Writes given protobuf message to the given filesystem path in the
 * background.//from  w  ww .j  ava2 s .  com
 *
 * @param message
 *          The proto message to write.
 * @param fs
 *          The HDFS filesystem to write to.
 * @param fileName
 *          The HDFS path to write the message to.
 */
public static void writeToHDFS(final GeneratedMessage message, final FileSystem fs, final String fileName) {
    HDFS_ASYNC_WRITE_SERVICE.submit(new Runnable() {
        @Override
        public void run() {
            Path pt = new Path(fileName);
            try {
                LOG.info("Writing " + fileName + " at " + fs.getUri());
                OutputStream wrappedStream = fs.create(pt, true).getWrappedStream();
                message.writeTo(wrappedStream);
                wrappedStream.close();
                LOG.info("Done writing " + fileName);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    });
}

From source file:org.apache.giraph.utils.DistributedCacheUtils.java

License:Apache License

/**
 * Copy a file to HDFS if it is local. If the path is already in HDFS, this
 * call does nothing.//from  w w  w.  j a  va  2s. co  m
 *
 * @param path path to file
 * @param conf Configuration
 * @return path to file on HDFS.
 */
public static Path copyToHdfs(Path path, Configuration conf) {
    if (path.toString().startsWith("hdfs://")) {
        // Already on HDFS
        return path;
    }

    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
    } catch (IOException e) {
        throw new IllegalArgumentException("Failed to get HDFS FileSystem", e);
    }
    String name = getBaseName(path.toString()) + "-" + System.nanoTime();
    Path remotePath = new Path("/tmp/giraph", name);
    LOG.info("copyToHdfsIfNecessary: Copying " + path + " to " + remotePath + " on hdfs " + fs.getUri());
    try {
        fs.copyFromLocalFile(false, true, path, remotePath);
    } catch (IOException e) {
        throw new IllegalArgumentException("Failed to copy jython script from local path " + path
                + " to hdfs path " + remotePath + " on hdfs " + fs.getUri(), e);
    }
    return remotePath;
}

From source file:org.apache.giraph.yarn.GiraphYarnClient.java

License:Apache License

/**
 * Set delegation tokens for AM container
 * @param amContainer AM container//ww  w .  j  a  va  2  s  .  c o m
 * @return
 */
private void setToken(ContainerLaunchContext amContainer) throws IOException {
    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = giraphConf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }
        FileSystem fs = FileSystem.get(giraphConf);
        // For now, only getting tokens for the default file-system.
        final Token<?>[] tokens = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }
}

From source file:org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDatasetTest.java

License:Apache License

public static ConvertibleHiveDataset createTestConvertibleDataset(Config config) throws URISyntaxException {
    Table table = getTestTable("db1", "tb1");
    FileSystem mockFs = Mockito.mock(FileSystem.class);
    when(mockFs.getUri()).thenReturn(new URI("test"));
    ConvertibleHiveDataset cd = new ConvertibleHiveDataset(mockFs, Mockito.mock(HiveMetastoreClientPool.class),
            new org.apache.hadoop.hive.ql.metadata.Table(table), new Properties(), config);
    return cd;/*ww  w. ja  va2s  .c o m*/
}

From source file:org.apache.gobblin.data.management.copy.CopyableFile.java

License:Apache License

/**
 * Set file system based source and destination dataset for this {@link CopyableFile}
 *
 * @param originFs {@link FileSystem} where this {@link CopyableFile} origins
 * @param targetFs {@link FileSystem} where this {@link CopyableFile} is copied to
 *///  w  w  w.j  a v a 2s . c  o  m
public void setFsDatasets(FileSystem originFs, FileSystem targetFs) {
    /*
     * By default, the raw Gobblin dataset for CopyableFile lineage is its parent folder
     * if itself is not a folder
     */
    boolean isDir = origin.isDirectory();

    Path fullSourcePath = Path.getPathWithoutSchemeAndAuthority(origin.getPath());
    String sourceDatasetName = isDir ? fullSourcePath.toString() : fullSourcePath.getParent().toString();
    DatasetDescriptor sourceDataset = new DatasetDescriptor(originFs.getScheme(), sourceDatasetName);
    sourceDataset.addMetadata(DatasetConstants.FS_URI, originFs.getUri().toString());
    sourceData = sourceDataset;

    Path fullDestinationPath = Path.getPathWithoutSchemeAndAuthority(destination);
    String destinationDatasetName = isDir ? fullDestinationPath.toString()
            : fullDestinationPath.getParent().toString();
    DatasetDescriptor destinationDataset = new DatasetDescriptor(targetFs.getScheme(), destinationDatasetName);
    destinationDataset.addMetadata(DatasetConstants.FS_URI, targetFs.getUri().toString());
    destinationData = destinationDataset;
}

From source file:org.apache.gobblin.data.management.copy.CopyableFileTest.java

License:Apache License

@Test
public void testSetFsDatasets() throws URISyntaxException {
    FileSystem originFs = mock(FileSystem.class);
    String originFsUri = "hdfs://source.company.biz:2000";
    String originPath = "/data/databases/source/profile";
    when(originFs.getUri()).thenReturn(new URI(originFsUri));
    when(originFs.getScheme()).thenReturn("hdfs");

    FileSystem targetFs = mock(FileSystem.class);
    String targetFsUri = "file:///";
    String destinationPath = "/data/databases/destination/profile";
    when(targetFs.getUri()).thenReturn(new URI(targetFsUri));
    when(targetFs.getScheme()).thenReturn("file");

    // Test when source file is not a directory
    FileStatus origin = new FileStatus(0l, false, 0, 0l, 0l, new Path(originPath));
    CopyableFile copyableFile = new CopyableFile(origin, new Path(destinationPath), null, null, null,
            PreserveAttributes.fromMnemonicString(""), "", 0, 0, Maps.<String, String>newHashMap(), "", null);
    copyableFile.setFsDatasets(originFs, targetFs);
    DatasetDescriptor source = (DatasetDescriptor) copyableFile.getSourceData();
    Assert.assertEquals(source.getName(), "/data/databases/source");
    Assert.assertEquals(source.getPlatform(), "hdfs");
    Assert.assertEquals(source.getMetadata().get("fsUri"), originFsUri);
    DatasetDescriptor destination = (DatasetDescriptor) copyableFile.getDestinationData();
    Assert.assertEquals(destination.getName(), "/data/databases/destination");
    Assert.assertEquals(destination.getPlatform(), "file");
    Assert.assertEquals(destination.getMetadata().get("fsUri"), targetFsUri);

    // Test when source file is a directory
    originPath = originFsUri + originPath;
    destinationPath = targetFsUri + destinationPath;
    origin = new FileStatus(0l, true, 0, 0l, 0l, new Path(originPath));
    copyableFile = new CopyableFile(origin, new Path(destinationPath), null, null, null,
            PreserveAttributes.fromMnemonicString(""), "", 0, 0, Maps.<String, String>newHashMap(), "", null);
    copyableFile.setFsDatasets(originFs, targetFs);
    source = (DatasetDescriptor) copyableFile.getSourceData();
    Assert.assertEquals(source.getName(), "/data/databases/source/profile");
    Assert.assertEquals(source.getPlatform(), "hdfs");
    Assert.assertEquals(source.getMetadata().get("fsUri"), originFsUri);
    destination = (DatasetDescriptor) copyableFile.getDestinationData();
    Assert.assertEquals(destination.getName(), "/data/databases/destination/profile");
    Assert.assertEquals(destination.getPlatform(), "file");
    Assert.assertEquals(destination.getMetadata().get("fsUri"), targetFsUri);
}

From source file:org.apache.gobblin.data.management.copy.CopySource.java

License:Apache License

/**
 * <ul>/* www.ja v  a  2s  .c o m*/
 * Does the following:
 * <li>Instantiate a {@link DatasetsFinder}.
 * <li>Find all {@link Dataset} using {@link DatasetsFinder}.
 * <li>For each {@link CopyableDataset} get all {@link CopyEntity}s.
 * <li>Create a {@link WorkUnit} per {@link CopyEntity}.
 * </ul>
 *
 * <p>
 * In this implementation, one workunit is created for every {@link CopyEntity} found. But the extractor/converters
 * and writers are built to support multiple {@link CopyEntity}s per workunit
 * </p>
 *
 * @param state see {@link org.apache.gobblin.configuration.SourceState}
 * @return Work units for copying files.
 */
@Override
public List<WorkUnit> getWorkunits(final SourceState state) {

    this.metricContext = Instrumented.getMetricContext(state, CopySource.class);
    this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker());

    try {

        DeprecationUtils.renameDeprecatedKeys(state,
                CopyConfiguration.MAX_COPY_PREFIX + "." + CopyResourcePool.ENTITIES_KEY,
                Lists.newArrayList(MAX_FILES_COPIED_KEY));

        final FileSystem sourceFs = HadoopUtils.getSourceFileSystem(state);
        final FileSystem targetFs = HadoopUtils.getWriterFileSystem(state, 1, 0);
        state.setProp(SlaEventKeys.SOURCE_URI, sourceFs.getUri());
        state.setProp(SlaEventKeys.DESTINATION_URI, targetFs.getUri());

        log.info("Identified source file system at {} and target file system at {}.", sourceFs.getUri(),
                targetFs.getUri());

        long maxSizePerBin = state.getPropAsLong(MAX_SIZE_MULTI_WORKUNITS, 0);
        long maxWorkUnitsPerMultiWorkUnit = state.getPropAsLong(MAX_WORK_UNITS_PER_BIN, 50);
        final long minWorkUnitWeight = Math.max(1, maxSizePerBin / maxWorkUnitsPerMultiWorkUnit);
        final Optional<CopyableFileWatermarkGenerator> watermarkGenerator = CopyableFileWatermarkHelper
                .getCopyableFileWatermarkGenerator(state);
        int maxThreads = state.getPropAsInt(MAX_CONCURRENT_LISTING_SERVICES,
                DEFAULT_MAX_CONCURRENT_LISTING_SERVICES);

        final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties())
                .build();

        this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, CopyConfiguration.COPY_PREFIX)
                .build();
        DatasetsFinder<CopyableDatasetBase> datasetFinder = DatasetUtils.instantiateDatasetFinder(
                state.getProperties(), sourceFs, DEFAULT_DATASET_PROFILE_CLASS_KEY, this.eventSubmitter, state);

        IterableDatasetFinder<CopyableDatasetBase> iterableDatasetFinder = datasetFinder instanceof IterableDatasetFinder
                ? (IterableDatasetFinder<CopyableDatasetBase>) datasetFinder
                : new IterableDatasetFinderImpl<>(datasetFinder);

        Iterator<CopyableDatasetRequestor> requestorIteratorWithNulls = Iterators.transform(
                iterableDatasetFinder.getDatasetsIterator(),
                new CopyableDatasetRequestor.Factory(targetFs, copyConfiguration, log));
        Iterator<CopyableDatasetRequestor> requestorIterator = Iterators.filter(requestorIteratorWithNulls,
                Predicates.<CopyableDatasetRequestor>notNull());

        final SetMultimap<FileSet<CopyEntity>, WorkUnit> workUnitsMap = Multimaps
                .<FileSet<CopyEntity>, WorkUnit>synchronizedSetMultimap(
                        HashMultimap.<FileSet<CopyEntity>, WorkUnit>create());

        RequestAllocator<FileSet<CopyEntity>> allocator = createRequestAllocator(copyConfiguration, maxThreads);
        Iterator<FileSet<CopyEntity>> prioritizedFileSets = allocator.allocateRequests(requestorIterator,
                copyConfiguration.getMaxToCopy());

        //Submit alertable events for unfulfilled requests
        submitUnfulfilledRequestEvents(allocator);

        String filesetWuGeneratorAlias = state.getProp(ConfigurationKeys.COPY_SOURCE_FILESET_WU_GENERATOR_CLASS,
                FileSetWorkUnitGenerator.class.getName());
        Iterator<Callable<Void>> callableIterator = Iterators.transform(prioritizedFileSets,
                new Function<FileSet<CopyEntity>, Callable<Void>>() {
                    @Nullable
                    @Override
                    public Callable<Void> apply(FileSet<CopyEntity> input) {
                        try {
                            return GobblinConstructorUtils.<FileSetWorkUnitGenerator>invokeLongestConstructor(
                                    new ClassAliasResolver(FileSetWorkUnitGenerator.class).resolveClass(
                                            filesetWuGeneratorAlias),
                                    input.getDataset(), input, state, targetFs, workUnitsMap,
                                    watermarkGenerator, minWorkUnitWeight, lineageInfo);
                        } catch (Exception e) {
                            throw new RuntimeException("Cannot create workunits generator", e);
                        }
                    }
                });

        try {
            List<Future<Void>> futures = new IteratorExecutor<>(callableIterator, maxThreads, ExecutorsUtils
                    .newDaemonThreadFactory(Optional.of(log), Optional.of("Copy-file-listing-pool-%d")))
                            .execute();

            for (Future<Void> future : futures) {
                try {
                    future.get();
                } catch (ExecutionException exc) {
                    log.error("Failed to get work units for dataset.", exc.getCause());
                }
            }
        } catch (InterruptedException ie) {
            log.error("Retrieval of work units was interrupted. Aborting.");
            return Lists.newArrayList();
        }

        log.info(String.format("Created %s workunits ", workUnitsMap.size()));

        copyConfiguration.getCopyContext().logCacheStatistics();

        if (state.contains(SIMULATE) && state.getPropAsBoolean(SIMULATE)) {
            log.info("Simulate mode enabled. Will not execute the copy.");
            for (Map.Entry<FileSet<CopyEntity>, Collection<WorkUnit>> entry : workUnitsMap.asMap().entrySet()) {
                log.info(String.format("Actions for dataset %s file set %s.",
                        entry.getKey().getDataset().datasetURN(), entry.getKey().getName()));
                for (WorkUnit workUnit : entry.getValue()) {
                    try {
                        CopyEntity copyEntity = deserializeCopyEntity(workUnit);
                        log.info(copyEntity.explain());
                    } catch (Exception e) {
                        log.info("Cannot deserialize CopyEntity from wu : {}", workUnit.toString());
                    }
                }
            }
            return Lists.newArrayList();
        }

        List<? extends WorkUnit> workUnits = new WorstFitDecreasingBinPacking(maxSizePerBin)
                .pack(Lists.newArrayList(workUnitsMap.values()), this.weighter);
        log.info(String.format(
                "Bin packed work units. Initial work units: %d, packed work units: %d, max weight per bin: %d, "
                        + "max work units per bin: %d.",
                workUnitsMap.size(), workUnits.size(), maxSizePerBin, maxWorkUnitsPerMultiWorkUnit));
        return ImmutableList.copyOf(workUnits);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.gobblin.data.management.copy.replication.ConfigBasedDatasetTest.java

License:Apache License

public Collection<? extends CopyEntity> testGetCopyableFilesHelper(String sourceDir, String destinationDir,
        long sourceWatermark, boolean isFilterEnabled) throws Exception {
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    URI local = localFs.getUri();

    Properties properties = new Properties();
    properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");
    PathFilter pathFilter = DatasetUtils.instantiatePathFilter(properties);
    boolean applyFilterToDirectories = false;
    if (isFilterEnabled) {
        properties.setProperty(DatasetUtils.CONFIGURATION_KEY_PREFIX + "path.filter.class",
                "org.apache.gobblin.util.filters.HiddenFilter");
        properties.setProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "true");

        pathFilter = DatasetUtils.instantiatePathFilter(properties);
        applyFilterToDirectories = Boolean
                .parseBoolean(properties.getProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "false"));
    }/*w w  w.j  a  va2s.  com*/

    CopyConfiguration copyConfiguration = CopyConfiguration
            .builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path(destinationDir))
            .preserve(PreserveAttributes.fromMnemonicString("ugp")).build();

    ReplicationMetaData mockMetaData = Mockito.mock(ReplicationMetaData.class);
    Mockito.when(mockMetaData.toString()).thenReturn("Mock Meta Data");

    ReplicationConfiguration mockRC = Mockito.mock(ReplicationConfiguration.class);
    Mockito.when(mockRC.getCopyMode()).thenReturn(ReplicationCopyMode.PULL);
    Mockito.when(mockRC.getMetaData()).thenReturn(mockMetaData);
    Mockito.when(mockRC.getVersionStrategyFromConfigStore())
            .thenReturn(Optional.of(DataFileVersionStrategy.DEFAULT_DATA_FILE_VERSION_STRATEGY));
    Mockito.when(mockRC.getEnforceFileSizeMatchFromConfigStore()).thenReturn(Optional.absent());
    HadoopFsEndPoint copyFrom = Mockito.mock(HadoopFsEndPoint.class);
    Mockito.when(copyFrom.getDatasetPath()).thenReturn(new Path(sourceDir));
    Mockito.when(copyFrom.getFsURI()).thenReturn(local);
    ComparableWatermark sw = new LongWatermark(sourceWatermark);
    Mockito.when(copyFrom.getWatermark()).thenReturn(Optional.of(sw));
    Mockito.when(copyFrom.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs,
            new Path(sourceDir), pathFilter, applyFilterToDirectories));

    HadoopFsEndPoint copyTo = Mockito.mock(HadoopFsEndPoint.class);
    Mockito.when(copyTo.getDatasetPath()).thenReturn(new Path(destinationDir));
    Mockito.when(copyTo.getFsURI()).thenReturn(local);
    Optional<ComparableWatermark> tmp = Optional.absent();
    Mockito.when(copyTo.getWatermark()).thenReturn(tmp);
    Mockito.when(copyTo.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs,
            new Path(destinationDir), pathFilter, applyFilterToDirectories));

    CopyRoute route = Mockito.mock(CopyRoute.class);
    Mockito.when(route.getCopyFrom()).thenReturn(copyFrom);
    Mockito.when(route.getCopyTo()).thenReturn(copyTo);

    ConfigBasedDataset dataset = new ConfigBasedDataset(mockRC, properties, route);
    Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(localFs, copyConfiguration);
    return copyableFiles;
}

From source file:org.apache.gobblin.data.management.copy.replication.ConfigBasedMultiDatasets.java

License:Apache License

public ConfigBasedMultiDatasets(Config c, Properties props, Optional<List<String>> blacklistPatterns) {
    this.props = props;
    blacklist = patternListInitHelper(blacklistPatterns);

    try {/* www . j a  v  a2 s  . c om*/
        FileSystem executionCluster = FileSystem.get(new Configuration());
        URI executionClusterURI = executionCluster.getUri();

        ReplicationConfiguration rc = ReplicationConfiguration.buildFromConfig(c);

        // push mode
        if (this.props.containsKey(REPLICATION_PUSH_MODE)
                && Boolean.parseBoolean(this.props.getProperty(REPLICATION_PUSH_MODE))) {
            generateDatasetInPushMode(rc, executionClusterURI);
        }
        // default pull mode
        else {
            generateDatasetInPullMode(rc, executionClusterURI);
        }
    } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
        log.error("Can not create Replication Configuration from raw config "
                + c.root().render(ConfigRenderOptions.defaults().setComments(false).setOriginComments(false)),
                e);
    } catch (IOException ioe) {
        log.error("Can not decide current execution cluster ", ioe);

    }
}

From source file:org.apache.gobblin.data.management.copy.splitter.DistcpFileSplitter.java

License:Apache License

/**
 * @param state {@link State} containing properties for a job.
 * @param targetFs destination {@link FileSystem} where file is to be copied
 * @return whether to allow for splitting of work units based on the filesystem, state, converter/writer config.
 *//*from  w w  w.ja v a  2s .c  o  m*/
public static boolean allowSplit(State state, FileSystem targetFs) {
    // Don't allow distcp jobs that use decrypt/ungzip converters or tararchive/encrypt writers to split work units
    Collection<String> converterClassNames = Collections.emptyList();
    if (state.contains(ConfigurationKeys.CONVERTER_CLASSES_KEY)) {
        converterClassNames = state.getPropAsList(ConfigurationKeys.CONVERTER_CLASSES_KEY);
    }

    return state.getPropAsBoolean(SPLIT_ENABLED, false)
            && KNOWN_SCHEMES_SUPPORTING_CONCAT.contains(targetFs.getUri().getScheme())
            && state.getProp(ConfigurationKeys.WRITER_BUILDER_CLASS, "")
                    .equals(FileAwareInputStreamDataWriterBuilder.class.getName())
            && converterClassNames.stream().noneMatch(s -> !s.equals(IdentityConverter.class.getName()));
}