Example usage for org.apache.hadoop.fs FileSystem getUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getUri.

Prototype

public abstract URI getUri();

Source Link

Document

Returns a URI which identifies this FileSystem.

Usage

From source file:org.apache.giraph.debugger.utils.AsyncHDFSWriteService.java

License:Apache License

/**
 * Writes given protobuf message to the given filesystem path in the
 * background.//from  w  ww .j  ava2 s .  com
 *
 * @param message
 *          The proto message to write.
 * @param fs
 *          The HDFS filesystem to write to.
 * @param fileName
 *          The HDFS path to write the message to.
 */
public static void writeToHDFS(final GeneratedMessage message, final FileSystem fs, final String fileName) {
    HDFS_ASYNC_WRITE_SERVICE.submit(new Runnable() {
        @Override
        public void run() {
            Path pt = new Path(fileName);
            try {
                LOG.info("Writing " + fileName + " at " + fs.getUri());
                OutputStream wrappedStream = fs.create(pt, true).getWrappedStream();
                message.writeTo(wrappedStream);
                wrappedStream.close();
                LOG.info("Done writing " + fileName);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    });
}

From source file:org.apache.giraph.utils.DistributedCacheUtils.java

License:Apache License

/**
 * Copy a file to HDFS if it is local. If the path is already in HDFS, this
 * call does nothing.//from  w w  w.  j a  va  2s. co  m
 *
 * @param path path to file
 * @param conf Configuration
 * @return path to file on HDFS.
 */
public static Path copyToHdfs(Path path, Configuration conf) {
    if (path.toString().startsWith("hdfs://")) {
        // Already on HDFS
        return path;
    }

    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
    } catch (IOException e) {
        throw new IllegalArgumentException("Failed to get HDFS FileSystem", e);
    }
    String name = getBaseName(path.toString()) + "-" + System.nanoTime();
    Path remotePath = new Path("/tmp/giraph", name);
    LOG.info("copyToHdfsIfNecessary: Copying " + path + " to " + remotePath + " on hdfs " + fs.getUri());
    try {
        fs.copyFromLocalFile(false, true, path, remotePath);
    } catch (IOException e) {
        throw new IllegalArgumentException("Failed to copy jython script from local path " + path
                + " to hdfs path " + remotePath + " on hdfs " + fs.getUri(), e);
    }
    return remotePath;
}

From source file:org.apache.giraph.yarn.GiraphYarnClient.java

License:Apache License

/**
 * Set delegation tokens for AM container
 * @param amContainer AM container//ww  w .  j  a  va  2  s  .  c o m
 * @return
 */
private void setToken(ContainerLaunchContext amContainer) throws IOException {
    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = giraphConf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }
        FileSystem fs = FileSystem.get(giraphConf);
        // For now, only getting tokens for the default file-system.
        final Token<?>[] tokens = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }
}

From source file:org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDatasetTest.java

License:Apache License

public static ConvertibleHiveDataset createTestConvertibleDataset(Config config) throws URISyntaxException {
    Table table = getTestTable("db1", "tb1");
    FileSystem mockFs = Mockito.mock(FileSystem.class);
    when(mockFs.getUri()).thenReturn(new URI("test"));
    ConvertibleHiveDataset cd = new ConvertibleHiveDataset(mockFs, Mockito.mock(HiveMetastoreClientPool.class),
            new org.apache.hadoop.hive.ql.metadata.Table(table), new Properties(), config);
    return cd;/*ww  w. ja  va2s  .c o m*/
}

From source file:org.apache.gobblin.data.management.copy.CopyableFile.java

License:Apache License

/**
 * Set file system based source and destination dataset for this {@link CopyableFile}
 *
 * @param originFs {@link FileSystem} where this {@link CopyableFile} origins
 * @param targetFs {@link FileSystem} where this {@link CopyableFile} is copied to
 *///  w  w  w.j  a v a 2s . c  o  m
public void setFsDatasets(FileSystem originFs, FileSystem targetFs) {
    /*
     * By default, the raw Gobblin dataset for CopyableFile lineage is its parent folder
     * if itself is not a folder
     */
    boolean isDir = origin.isDirectory();

    Path fullSourcePath = Path.getPathWithoutSchemeAndAuthority(origin.getPath());
    String sourceDatasetName = isDir ? fullSourcePath.toString() : fullSourcePath.getParent().toString();
    DatasetDescriptor sourceDataset = new DatasetDescriptor(originFs.getScheme(), sourceDatasetName);
    sourceDataset.addMetadata(DatasetConstants.FS_URI, originFs.getUri().toString());
    sourceData = sourceDataset;

    Path fullDestinationPath = Path.getPathWithoutSchemeAndAuthority(destination);
    String destinationDatasetName = isDir ? fullDestinationPath.toString()
            : fullDestinationPath.getParent().toString();
    DatasetDescriptor destinationDataset = new DatasetDescriptor(targetFs.getScheme(), destinationDatasetName);
    destinationDataset.addMetadata(DatasetConstants.FS_URI, targetFs.getUri().toString());
    destinationData = destinationDataset;
}

From source file:org.apache.gobblin.data.management.copy.CopyableFileTest.java

License:Apache License

@Test
public void testSetFsDatasets() throws URISyntaxException {
    FileSystem originFs = mock(FileSystem.class);
    String originFsUri = "hdfs://source.company.biz:2000";
    String originPath = "/data/databases/source/profile";
    when(originFs.getUri()).thenReturn(new URI(originFsUri));
    when(originFs.getScheme()).thenReturn("hdfs");

    FileSystem targetFs = mock(FileSystem.class);
    String targetFsUri = "file:///";
    String destinationPath = "/data/databases/destination/profile";
    when(targetFs.getUri()).thenReturn(new URI(targetFsUri));
    when(targetFs.getScheme()).thenReturn("file");

    // Test when source file is not a directory
    FileStatus origin = new FileStatus(0l, false, 0, 0l, 0l, new Path(originPath));
    CopyableFile copyableFile = new CopyableFile(origin, new Path(destinationPath), null, null, null,
            PreserveAttributes.fromMnemonicString(""), "", 0, 0, Maps.<String, String>newHashMap(), "", null);
    copyableFile.setFsDatasets(originFs, targetFs);
    DatasetDescriptor source = (DatasetDescriptor) copyableFile.getSourceData();
    Assert.assertEquals(source.getName(), "/data/databases/source");
    Assert.assertEquals(source.getPlatform(), "hdfs");
    Assert.assertEquals(source.getMetadata().get("fsUri"), originFsUri);
    DatasetDescriptor destination = (DatasetDescriptor) copyableFile.getDestinationData();
    Assert.assertEquals(destination.getName(), "/data/databases/destination");
    Assert.assertEquals(destination.getPlatform(), "file");
    Assert.assertEquals(destination.getMetadata().get("fsUri"), targetFsUri);

    // Test when source file is a directory
    originPath = originFsUri + originPath;
    destinationPath = targetFsUri + destinationPath;
    origin = new FileStatus(0l, true, 0, 0l, 0l, new Path(originPath));
    copyableFile = new CopyableFile(origin, new Path(destinationPath), null, null, null,
            PreserveAttributes.fromMnemonicString(""), "", 0, 0, Maps.<String, String>newHashMap(), "", null);
    copyableFile.setFsDatasets(originFs, targetFs);
    source = (DatasetDescriptor) copyableFile.getSourceData();
    Assert.assertEquals(source.getName(), "/data/databases/source/profile");
    Assert.assertEquals(source.getPlatform(), "hdfs");
    Assert.assertEquals(source.getMetadata().get("fsUri"), originFsUri);
    destination = (DatasetDescriptor) copyableFile.getDestinationData();
    Assert.assertEquals(destination.getName(), "/data/databases/destination/profile");
    Assert.assertEquals(destination.getPlatform(), "file");
    Assert.assertEquals(destination.getMetadata().get("fsUri"), targetFsUri);
}

From source file:org.apache.gobblin.data.management.copy.CopySource.java

License:Apache License

/**
 * <ul>/* www.ja v  a  2s  .c o m*/
 * Does the following:
 * <li>Instantiate a {@link DatasetsFinder}.
 * <li>Find all {@link Dataset} using {@link DatasetsFinder}.
 * <li>For each {@link CopyableDataset} get all {@link CopyEntity}s.
 * <li>Create a {@link WorkUnit} per {@link CopyEntity}.
 * </ul>
 *
 * <p>
 * In this implementation, one workunit is created for every {@link CopyEntity} found. But the extractor/converters
 * and writers are built to support multiple {@link CopyEntity}s per workunit
 * </p>
 *
 * @param state see {@link org.apache.gobblin.configuration.SourceState}
 * @return Work units for copying files.
 */
@Override
public List<WorkUnit> getWorkunits(final SourceState state) {

    this.metricContext = Instrumented.getMetricContext(state, CopySource.class);
    this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker());

    try {

        DeprecationUtils.renameDeprecatedKeys(state,
                CopyConfiguration.MAX_COPY_PREFIX + "." + CopyResourcePool.ENTITIES_KEY,
                Lists.newArrayList(MAX_FILES_COPIED_KEY));

        final FileSystem sourceFs = HadoopUtils.getSourceFileSystem(state);
        final FileSystem targetFs = HadoopUtils.getWriterFileSystem(state, 1, 0);
        state.setProp(SlaEventKeys.SOURCE_URI, sourceFs.getUri());
        state.setProp(SlaEventKeys.DESTINATION_URI, targetFs.getUri());

        log.info("Identified source file system at {} and target file system at {}.", sourceFs.getUri(),
                targetFs.getUri());

        long maxSizePerBin = state.getPropAsLong(MAX_SIZE_MULTI_WORKUNITS, 0);
        long maxWorkUnitsPerMultiWorkUnit = state.getPropAsLong(MAX_WORK_UNITS_PER_BIN, 50);
        final long minWorkUnitWeight = Math.max(1, maxSizePerBin / maxWorkUnitsPerMultiWorkUnit);
        final Optional<CopyableFileWatermarkGenerator> watermarkGenerator = CopyableFileWatermarkHelper
                .getCopyableFileWatermarkGenerator(state);
        int maxThreads = state.getPropAsInt(MAX_CONCURRENT_LISTING_SERVICES,
                DEFAULT_MAX_CONCURRENT_LISTING_SERVICES);

        final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties())
                .build();

        this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, CopyConfiguration.COPY_PREFIX)
                .build();
        DatasetsFinder<CopyableDatasetBase> datasetFinder = DatasetUtils.instantiateDatasetFinder(
                state.getProperties(), sourceFs, DEFAULT_DATASET_PROFILE_CLASS_KEY, this.eventSubmitter, state);

        IterableDatasetFinder<CopyableDatasetBase> iterableDatasetFinder = datasetFinder instanceof IterableDatasetFinder
                ? (IterableDatasetFinder<CopyableDatasetBase>) datasetFinder
                : new IterableDatasetFinderImpl<>(datasetFinder);

        Iterator<CopyableDatasetRequestor> requestorIteratorWithNulls = Iterators.transform(
                iterableDatasetFinder.getDatasetsIterator(),
                new CopyableDatasetRequestor.Factory(targetFs, copyConfiguration, log));
        Iterator<CopyableDatasetRequestor> requestorIterator = Iterators.filter(requestorIteratorWithNulls,
                Predicates.<CopyableDatasetRequestor>notNull());

        final SetMultimap<FileSet<CopyEntity>, WorkUnit> workUnitsMap = Multimaps
                .<FileSet<CopyEntity>, WorkUnit>synchronizedSetMultimap(
                        HashMultimap.<FileSet<CopyEntity>, WorkUnit>create());

        RequestAllocator<FileSet<CopyEntity>> allocator = createRequestAllocator(copyConfiguration, maxThreads);
        Iterator<FileSet<CopyEntity>> prioritizedFileSets = allocator.allocateRequests(requestorIterator,
                copyConfiguration.getMaxToCopy());

        //Submit alertable events for unfulfilled requests
        submitUnfulfilledRequestEvents(allocator);

        String filesetWuGeneratorAlias = state.getProp(ConfigurationKeys.COPY_SOURCE_FILESET_WU_GENERATOR_CLASS,
                FileSetWorkUnitGenerator.class.getName());
        Iterator<Callable<Void>> callableIterator = Iterators.transform(prioritizedFileSets,
                new Function<FileSet<CopyEntity>, Callable<Void>>() {
                    @Nullable
                    @Override
                    public Callable<Void> apply(FileSet<CopyEntity> input) {
                        try {
                            return GobblinConstructorUtils.<FileSetWorkUnitGenerator>invokeLongestConstructor(
                                    new ClassAliasResolver(FileSetWorkUnitGenerator.class).resolveClass(
                                            filesetWuGeneratorAlias),
                                    input.getDataset(), input, state, targetFs, workUnitsMap,
                                    watermarkGenerator, minWorkUnitWeight, lineageInfo);
                        } catch (Exception e) {
                            throw new RuntimeException("Cannot create workunits generator", e);
                        }
                    }
                });

        try {
            List<Future<Void>> futures = new IteratorExecutor<>(callableIterator, maxThreads, ExecutorsUtils
                    .newDaemonThreadFactory(Optional.of(log), Optional.of("Copy-file-listing-pool-%d")))
                            .execute();

            for (Future<Void> future : futures) {
                try {
                    future.get();
                } catch (ExecutionException exc) {
                    log.error("Failed to get work units for dataset.", exc.getCause());
                }
            }
        } catch (InterruptedException ie) {
            log.error("Retrieval of work units was interrupted. Aborting.");
            return Lists.newArrayList();
        }

        log.info(String.format("Created %s workunits ", workUnitsMap.size()));

        copyConfiguration.getCopyContext().logCacheStatistics();

        if (state.contains(SIMULATE) && state.getPropAsBoolean(SIMULATE)) {
            log.info("Simulate mode enabled. Will not execute the copy.");
            for (Map.Entry<FileSet<CopyEntity>, Collection<WorkUnit>> entry : workUnitsMap.asMap().entrySet()) {
                log.info(String.format("Actions for dataset %s file set %s.",
                        entry.getKey().getDataset().datasetURN(), entry.getKey().getName()));
                for (WorkUnit workUnit : entry.getValue()) {
                    try {
                        CopyEntity copyEntity = deserializeCopyEntity(workUnit);
                        log.info(copyEntity.explain());
                    } catch (Exception e) {
                        log.info("Cannot deserialize CopyEntity from wu : {}", workUnit.toString());
                    }
                }
            }
            return Lists.newArrayList();
        }

        List<? extends WorkUnit> workUnits = new WorstFitDecreasingBinPacking(maxSizePerBin)
                .pack(Lists.newArrayList(workUnitsMap.values()), this.weighter);
        log.info(String.format(
                "Bin packed work units. Initial work units: %d, packed work units: %d, max weight per bin: %d, "
                        + "max work units per bin: %d.",
                workUnitsMap.size(), workUnits.size(), maxSizePerBin, maxWorkUnitsPerMultiWorkUnit));
        return ImmutableList.copyOf(workUnits);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.gobblin.data.management.copy.replication.ConfigBasedDatasetTest.java

License:Apache License

public Collection<? extends CopyEntity> testGetCopyableFilesHelper(String sourceDir, String destinationDir,
        long sourceWatermark, boolean isFilterEnabled) throws Exception {
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    URI local = localFs.getUri();

    Properties properties = new Properties();
    properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");
    PathFilter pathFilter = DatasetUtils.instantiatePathFilter(properties);
    boolean applyFilterToDirectories = false;
    if (isFilterEnabled) {
        properties.setProperty(DatasetUtils.CONFIGURATION_KEY_PREFIX + "path.filter.class",
                "org.apache.gobblin.util.filters.HiddenFilter");
        properties.setProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "true");

        pathFilter = DatasetUtils.instantiatePathFilter(properties);
        applyFilterToDirectories = Boolean
                .parseBoolean(properties.getProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "false"));
    }/*w w  w.j  a  va2s.  com*/

    CopyConfiguration copyConfiguration = CopyConfiguration
            .builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path(destinationDir))
            .preserve(PreserveAttributes.fromMnemonicString("ugp")).build();

    ReplicationMetaData mockMetaData = Mockito.mock(ReplicationMetaData.class);
    Mockito.when(mockMetaData.toString()).thenReturn("Mock Meta Data");

    ReplicationConfiguration mockRC = Mockito.mock(ReplicationConfiguration.class);
    Mockito.when(mockRC.getCopyMode()).thenReturn(ReplicationCopyMode.PULL);
    Mockito.when(mockRC.getMetaData()).thenReturn(mockMetaData);
    Mockito.when(mockRC.getVersionStrategyFromConfigStore())
            .thenReturn(Optional.of(DataFileVersionStrategy.DEFAULT_DATA_FILE_VERSION_STRATEGY));
    Mockito.when(mockRC.getEnforceFileSizeMatchFromConfigStore()).thenReturn(Optional.absent());
    HadoopFsEndPoint copyFrom = Mockito.mock(HadoopFsEndPoint.class);
    Mockito.when(copyFrom.getDatasetPath()).thenReturn(new Path(sourceDir));
    Mockito.when(copyFrom.getFsURI()).thenReturn(local);
    ComparableWatermark sw = new LongWatermark(sourceWatermark);
    Mockito.when(copyFrom.getWatermark()).thenReturn(Optional.of(sw));
    Mockito.when(copyFrom.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs,
            new Path(sourceDir), pathFilter, applyFilterToDirectories));

    HadoopFsEndPoint copyTo = Mockito.mock(HadoopFsEndPoint.class);
    Mockito.when(copyTo.getDatasetPath()).thenReturn(new Path(destinationDir));
    Mockito.when(copyTo.getFsURI()).thenReturn(local);
    Optional<ComparableWatermark> tmp = Optional.absent();
    Mockito.when(copyTo.getWatermark()).thenReturn(tmp);
    Mockito.when(copyTo.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs,
            new Path(destinationDir), pathFilter, applyFilterToDirectories));

    CopyRoute route = Mockito.mock(CopyRoute.class);
    Mockito.when(route.getCopyFrom()).thenReturn(copyFrom);
    Mockito.when(route.getCopyTo()).thenReturn(copyTo);

    ConfigBasedDataset dataset = new ConfigBasedDataset(mockRC, properties, route);
    Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(localFs, copyConfiguration);
    return copyableFiles;
}

From source file:org.apache.gobblin.data.management.copy.replication.ConfigBasedMultiDatasets.java

License:Apache License

public ConfigBasedMultiDatasets(Config c, Properties props, Optional<List<String>> blacklistPatterns) {
    this.props = props;
    blacklist = patternListInitHelper(blacklistPatterns);

    try {/* www . j a  v  a2 s  . c om*/
        FileSystem executionCluster = FileSystem.get(new Configuration());
        URI executionClusterURI = executionCluster.getUri();

        ReplicationConfiguration rc = ReplicationConfiguration.buildFromConfig(c);

        // push mode
        if (this.props.containsKey(REPLICATION_PUSH_MODE)
                && Boolean.parseBoolean(this.props.getProperty(REPLICATION_PUSH_MODE))) {
            generateDatasetInPushMode(rc, executionClusterURI);
        }
        // default pull mode
        else {
            generateDatasetInPullMode(rc, executionClusterURI);
        }
    } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
        log.error("Can not create Replication Configuration from raw config "
                + c.root().render(ConfigRenderOptions.defaults().setComments(false).setOriginComments(false)),
                e);
    } catch (IOException ioe) {
        log.error("Can not decide current execution cluster ", ioe);

    }
}

From source file:org.apache.gobblin.data.management.copy.splitter.DistcpFileSplitter.java

License:Apache License

/**
 * @param state {@link State} containing properties for a job.
 * @param targetFs destination {@link FileSystem} where file is to be copied
 * @return whether to allow for splitting of work units based on the filesystem, state, converter/writer config.
 *//*from  w w  w.ja v a  2s .c  o  m*/
public static boolean allowSplit(State state, FileSystem targetFs) {
    // Don't allow distcp jobs that use decrypt/ungzip converters or tararchive/encrypt writers to split work units
    Collection<String> converterClassNames = Collections.emptyList();
    if (state.contains(ConfigurationKeys.CONVERTER_CLASSES_KEY)) {
        converterClassNames = state.getPropAsList(ConfigurationKeys.CONVERTER_CLASSES_KEY);
    }

    return state.getPropAsBoolean(SPLIT_ENABLED, false)
            && KNOWN_SCHEMES_SUPPORTING_CONCAT.contains(targetFs.getUri().getScheme())
            && state.getProp(ConfigurationKeys.WRITER_BUILDER_CLASS, "")
                    .equals(FileAwareInputStreamDataWriterBuilder.class.getName())
            && converterClassNames.stream().noneMatch(s -> !s.equals(IdentityConverter.class.getName()));
}