List of usage examples for org.apache.hadoop.fs FileSystem getUri
public abstract URI getUri();
From source file:edu.berkeley.chukwa_xtrace.TestXtrExtract.java
License:Apache License
public void testArchiving() throws Exception { System.out.println("starting archive test"); Configuration conf = new Configuration(); System.setProperty("hadoop.log.dir", System.getProperty("test.build.data", "/tmp")); MiniDFSCluster dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null); FileSystem fileSys = dfs.getFileSystem(); fileSys.delete(OUTPUT_DIR, true);//nuke output dir writeASinkFile(conf, fileSys, INPUT_DIR, 1000); FileStatus fstat = fileSys.getFileStatus(INPUT_DIR); assertTrue(fstat.getLen() > 10);/* w ww.j a v a 2 s . c om*/ System.out.println("filesystem is " + fileSys.getUri()); conf.set("fs.default.name", fileSys.getUri().toString()); conf.setInt("io.sort.mb", 1); conf.setInt("io.sort.factor", 5); conf.setInt("mapred.tasktracker.map.tasks.maximum", 2); conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2); MiniMRCluster mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1); String[] archiveArgs = { INPUT_DIR.toString(), fileSys.getUri().toString() + OUTPUT_DIR.toString() }; JobConf jc = mr.createJobConf(new JobConf(conf)); assertEquals("true", jc.get("archive.groupByClusterName")); assertEquals(1, jc.getInt("io.sort.mb", 5)); int returnVal = ToolRunner.run(jc, new XtrExtract(), archiveArgs); assertEquals(0, returnVal); fstat = fileSys.getFileStatus(new Path("/chukwa/archives/foocluster/HadoopLogProcessor_2008_05_29.arc")); assertTrue(fstat.getLen() > 10); Thread.sleep(1000); System.out.println("done!"); }
From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java
License:Apache License
private void checkPath(FileSystem fs, Path path) throws PermissionException, NotFoundException, IOException { if (_disallowLocalInput) { // If we update to Hadoop 1.0, we should use the canonical URI which is definitely unique to each file system. However, the normal one should be, too. if (fs.getUri().equals(FileSystem.getLocal(new Configuration()).getUri())) { throw new PermissionException("Not allowed to read from the local file system."); }// www . j a va2 s . c om } if (!fs.exists(path)) throw new NotFoundException("Input path does not exist: " + path.toString()); if (_enforceInputContainment) { // Check that path is inside home directory Path relativePath = relativizePath(_homeDir, path); if (relativePath.isAbsolute()) ; // Has authority or begins with "/" throw new PermissionException("Not allowed to read outside the " + "WebMapReduce home directory (" + _homeDir.toString() + "). Please specify a relative path."); } }
From source file:es.tid.cosmos.platform.injection.server.HadoopFileSystemViewTest.java
License:Open Source License
/** * // ww w .j a v a 2s . c om * @throws Exception */ @Test public void testGetFileWithEmptyFilename() throws Exception { HadoopSshFile file = this.hadoopFileSystemView.getFile(""); FileSystem hadoopFS = FileSystem.get(URI.create(this.conf.get("fs.default.name")), this.conf);/*, this.userName);*/ String homePath = hadoopFS.getHomeDirectory().toString().replaceFirst(hadoopFS.getUri().toString(), ""); assertEquals(homePath, file.getAbsolutePath()); assertEquals(homePath.substring(homePath.lastIndexOf("/") + 1), file.getName()); }
From source file:es.tid.cosmos.platform.injection.server.HadoopFileSystemViewTest.java
License:Open Source License
/** * //from ww w . ja va 2s.c o m * @throws Exception */ @Test public void testRedirectionToHomePath() throws Exception { FileSystem fs = FileSystem.get(URI.create(this.conf.get("fs.default.name")), this.conf);//, "test"); String homePath = fs.getHomeDirectory().toString().replaceFirst(fs.getUri().toString(), ""); HadoopSshFile init = this.hadoopFileSystemView.getFile(new HadoopSshFile(".", "test", fs), "."); assertEquals(homePath, init.getAbsolutePath()); }
From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java
License:Apache License
/** * Constructs a {@link SimpleHadoopFilesystemConfigStore} using a given {@link FileSystem} and a {@link URI} that points to the * physical location of the store root./* ww w .j av a 2s.com*/ * * @param fs the {@link FileSystem} the {@link ConfigStore} is stored on. * @param physicalStoreRoot the fully qualified {@link URI} of the physical store root, the {@link URI#getScheme()} of the * {@link URI} should match the {@link FileSystem#getScheme()} of the given {@link FileSystem}. * @param logicalStoreRoot the fully qualfied {@link URI} of the logical store root */ protected SimpleHadoopFilesystemConfigStore(FileSystem fs, URI physicalStoreRoot, URI logicalStoreRoot) { Preconditions.checkNotNull(fs, "fs cannot be null!"); Preconditions.checkNotNull(physicalStoreRoot, "physicalStoreRoot cannot be null!"); Preconditions.checkNotNull(logicalStoreRoot, "logicalStoreRoot cannot be null!"); this.fs = fs; Preconditions.checkArgument(!Strings.isNullOrEmpty(physicalStoreRoot.getScheme()), "The physicalStoreRoot must have a valid scheme!"); Preconditions.checkArgument(physicalStoreRoot.getScheme().equals(fs.getUri().getScheme()), "The scheme of the physicalStoreRoot and the filesystem must match!"); Preconditions.checkArgument(!Strings.isNullOrEmpty(physicalStoreRoot.getPath()), "The path of the physicalStoreRoot must be valid as it is the root of the store!"); this.physicalStoreRoot = physicalStoreRoot; this.logicalStoreRoot = logicalStoreRoot; this.versions = CacheBuilder.newBuilder().build(); this.storeMetadata = new SimpleHDFSStoreMetadata(fs, new Path(new Path(this.physicalStoreRoot), CONFIG_STORE_NAME)); }
From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStoreFactory.java
License:Apache License
/** * This method determines the physical location of the {@link SimpleHadoopFilesystemConfigStore} root directory on HDFS. It does * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory * contains the folder {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root * directory.//www .java 2 s . c om * * <p> * If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does * not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If * the default root dir does not contain the {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME} then a * {@link ConfigStoreCreationException} is thrown. * </p> */ private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException { if (Strings.isNullOrEmpty(configKey.getAuthority())) { if (getDefaultStoreURILazy() != null) { return getDefaultStoreURILazy(); } else if (isAuthorityRequired()) { throw new ConfigStoreCreationException(configKey, "No default store has been configured."); } } Path path = new Path(configKey.getPath()); while (path != null) { try { // the abs URI may point to an unexist path for // 1. phantom node // 2. as URI did not specify the version if (fs.exists(path)) { for (FileStatus fileStatus : fs.listStatus(path)) { if (fileStatus.isDirectory() && fileStatus.getPath().getName() .equals(SimpleHadoopFilesystemConfigStore.CONFIG_STORE_NAME)) { return fs.getUri().resolve(fileStatus.getPath().getParent().toUri()); } } } } catch (IOException e) { throw new ConfigStoreCreationException(configKey, e); } path = path.getParent(); } throw new ConfigStoreCreationException(configKey, "Cannot find the store root!"); }
From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStore.java
License:Apache License
/** * Constructs a {@link SimpleHDFSConfigStore} using a given {@link FileSystem} and a {@link URI} that points to the * physical location of the store root./*from w w w . ja va2s . c o m*/ * * @param fs the {@link FileSystem} the {@link ConfigStore} is stored on. * @param physicalStoreRoot the fully qualified {@link URI} of the physical store root, the {@link URI#getScheme()} of the * {@link URI} should match the {@link FileSystem#getScheme()} of the given {@link FileSystem}. * @param logicalStoreRoot the fully qualfied {@link URI} of the logical store root */ protected SimpleHDFSConfigStore(FileSystem fs, URI physicalStoreRoot, URI logicalStoreRoot) { Preconditions.checkNotNull(fs, "fs cannot be null!"); Preconditions.checkNotNull(physicalStoreRoot, "physicalStoreRoot cannot be null!"); Preconditions.checkNotNull(logicalStoreRoot, "logicalStoreRoot cannot be null!"); this.fs = fs; Preconditions.checkArgument(!Strings.isNullOrEmpty(physicalStoreRoot.getScheme()), "The physicalStoreRoot must have a valid scheme!"); Preconditions.checkArgument(physicalStoreRoot.getScheme().equals(fs.getUri().getScheme()), "The scheme of the physicalStoreRoot and the filesystem must match!"); Preconditions.checkArgument(!Strings.isNullOrEmpty(physicalStoreRoot.getPath()), "The path of the physicalStoreRoot must be valid as it is the root of the store!"); this.physicalStoreRoot = physicalStoreRoot; this.logicalStoreRoot = logicalStoreRoot; this.versions = CacheBuilder.newBuilder().build(); this.storeMetadata = new SimpleHDFSStoreMetadata(fs, new Path(new Path(this.physicalStoreRoot), CONFIG_STORE_NAME)); }
From source file:gobblin.config.store.hdfs.SimpleHDFSConfigStoreFactory.java
License:Apache License
/** * This method determines the physical location of the {@link SimpleHDFSConfigStore} root directory on HDFS. It does * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory * contains the folder {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root * directory./*from ww w. j a va 2 s .c o m*/ * * <p> * If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does * not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If * the default root dir does not contain the {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME} then a * {@link ConfigStoreCreationException} is thrown. * </p> */ private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException { if (Strings.isNullOrEmpty(configKey.getAuthority())) { if (!hasDefaultStoreURI()) { throw new ConfigStoreCreationException(configKey, "No default store has been configured."); } return this.defaultStoreURI.get(); } Path path = new Path(configKey.getPath()); while (path != null) { try { // the abs URI may point to an unexist path for // 1. phantom node // 2. as URI did not specify the version if (fs.exists(path)) { for (FileStatus fileStatus : fs.listStatus(path)) { if (fileStatus.isDirectory() && fileStatus.getPath().getName().equals(SimpleHDFSConfigStore.CONFIG_STORE_NAME)) { return fs.getUri().resolve(fileStatus.getPath().getParent().toUri()); } } } } catch (IOException e) { throw new ConfigStoreCreationException(configKey, e); } path = path.getParent(); } throw new ConfigStoreCreationException(configKey, "Cannot find the store root!"); }
From source file:gobblin.data.management.copy.CopySource.java
License:Apache License
/** * <ul>//w w w . j a va2 s. co m * Does the following: * <li>Instantiate a {@link DatasetsFinder}. * <li>Find all {@link Dataset} using {@link DatasetsFinder}. * <li>For each {@link CopyableDataset} get all {@link CopyEntity}s. * <li>Create a {@link WorkUnit} per {@link CopyEntity}. * </ul> * * <p> * In this implementation, one workunit is created for every {@link CopyEntity} found. But the extractor/converters * and writers are built to support multiple {@link CopyEntity}s per workunit * </p> * * @param state see {@link gobblin.configuration.SourceState} * @return Work units for copying files. */ @Override public List<WorkUnit> getWorkunits(final SourceState state) { this.metricContext = Instrumented.getMetricContext(state, CopySource.class); try { DeprecationUtils.renameDeprecatedKeys(state, CopyConfiguration.MAX_COPY_PREFIX + "." + CopyResourcePool.ENTITIES_KEY, Lists.newArrayList(MAX_FILES_COPIED_KEY)); final FileSystem sourceFs = getSourceFileSystem(state); final FileSystem targetFs = getTargetFileSystem(state); log.info("Identified source file system at {} and target file system at {}.", sourceFs.getUri(), targetFs.getUri()); long maxSizePerBin = state.getPropAsLong(MAX_SIZE_MULTI_WORKUNITS, 0); long maxWorkUnitsPerMultiWorkUnit = state.getPropAsLong(MAX_WORK_UNITS_PER_BIN, 50); final long minWorkUnitWeight = Math.max(1, maxSizePerBin / maxWorkUnitsPerMultiWorkUnit); final Optional<CopyableFileWatermarkGenerator> watermarkGenerator = CopyableFileWatermarkHelper .getCopyableFileWatermarkGenerator(state); int maxThreads = state.getPropAsInt(MAX_CONCURRENT_LISTING_SERVICES, DEFAULT_MAX_CONCURRENT_LISTING_SERVICES); final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties()) .build(); DatasetsFinder<CopyableDatasetBase> datasetFinder = DatasetUtils.instantiateDatasetFinder( state.getProperties(), sourceFs, DEFAULT_DATASET_PROFILE_CLASS_KEY, new EventSubmitter.Builder(this.metricContext, CopyConfiguration.COPY_PREFIX).build(), state); IterableDatasetFinder<CopyableDatasetBase> iterableDatasetFinder = datasetFinder instanceof IterableDatasetFinder ? (IterableDatasetFinder<CopyableDatasetBase>) datasetFinder : new IterableDatasetFinderImpl<>(datasetFinder); Iterator<CopyableDatasetRequestor> requestorIteratorWithNulls = Iterators.transform( iterableDatasetFinder.getDatasetsIterator(), new CopyableDatasetRequestor.Factory(targetFs, copyConfiguration, log)); Iterator<CopyableDatasetRequestor> requestorIterator = Iterators.filter(requestorIteratorWithNulls, Predicates.<CopyableDatasetRequestor>notNull()); final SetMultimap<FileSet<CopyEntity>, WorkUnit> workUnitsMap = Multimaps .<FileSet<CopyEntity>, WorkUnit>synchronizedSetMultimap( HashMultimap.<FileSet<CopyEntity>, WorkUnit>create()); RequestAllocator<FileSet<CopyEntity>> allocator = createRequestAllocator(copyConfiguration, maxThreads); Iterator<FileSet<CopyEntity>> prioritizedFileSets = allocator.allocateRequests(requestorIterator, copyConfiguration.getMaxToCopy()); Iterator<Callable<Void>> callableIterator = Iterators.transform(prioritizedFileSets, new Function<FileSet<CopyEntity>, Callable<Void>>() { @Nullable @Override public Callable<Void> apply(FileSet<CopyEntity> input) { return new FileSetWorkUnitGenerator((CopyableDatasetBase) input.getDataset(), input, state, workUnitsMap, watermarkGenerator, minWorkUnitWeight); } }); try { List<Future<Void>> futures = new IteratorExecutor<>(callableIterator, maxThreads, ExecutorsUtils .newDaemonThreadFactory(Optional.of(log), Optional.of("Copy-file-listing-pool-%d"))) .execute(); for (Future<Void> future : futures) { try { future.get(); } catch (ExecutionException exc) { log.error("Failed to get work units for dataset.", exc.getCause()); } } } catch (InterruptedException ie) { log.error("Retrieval of work units was interrupted. Aborting."); return Lists.newArrayList(); } log.info(String.format("Created %s workunits ", workUnitsMap.size())); copyConfiguration.getCopyContext().logCacheStatistics(); if (state.contains(SIMULATE) && state.getPropAsBoolean(SIMULATE)) { log.info("Simulate mode enabled. Will not execute the copy."); for (Map.Entry<FileSet<CopyEntity>, Collection<WorkUnit>> entry : workUnitsMap.asMap().entrySet()) { log.info(String.format("Actions for dataset %s file set %s.", entry.getKey().getDataset().datasetURN(), entry.getKey().getName())); for (WorkUnit workUnit : entry.getValue()) { CopyEntity copyEntity = deserializeCopyEntity(workUnit); log.info(copyEntity.explain()); } } return Lists.newArrayList(); } List<? extends WorkUnit> workUnits = new WorstFitDecreasingBinPacking(maxSizePerBin) .pack(Lists.newArrayList(workUnitsMap.values()), this.weighter); log.info(String.format( "Bin packed work units. Initial work units: %d, packed work units: %d, max weight per bin: %d, " + "max work units per bin: %d.", workUnitsMap.size(), workUnits.size(), maxSizePerBin, maxWorkUnitsPerMultiWorkUnit)); return ImmutableList.copyOf(workUnits); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:gobblin.data.management.copy.replication.ConfigBasedDatasetsTest.java
License:Apache License
@Test public void testGetCopyableFiles() throws Exception { String sourceDir = getClass().getClassLoader().getResource("configBasedDatasetTest/src").getFile(); String destinationDir = getClass().getClassLoader().getResource("configBasedDatasetTest/dest").getFile(); FileSystem localFs = FileSystem.getLocal(new Configuration()); URI local = localFs.getUri(); long sourceWatermark = 100L; Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher"); CopyConfiguration copyConfiguration = CopyConfiguration .builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path(destinationDir)) .preserve(PreserveAttributes.fromMnemonicString("ugp")).build(); ReplicationMetaData mockMetaData = Mockito.mock(ReplicationMetaData.class); Mockito.when(mockMetaData.toString()).thenReturn("Mock Meta Data"); ReplicationConfiguration mockRC = Mockito.mock(ReplicationConfiguration.class); Mockito.when(mockRC.getCopyMode()).thenReturn(ReplicationCopyMode.PULL); Mockito.when(mockRC.getMetaData()).thenReturn(mockMetaData); HadoopFsEndPoint copyFrom = Mockito.mock(HadoopFsEndPoint.class); Mockito.when(copyFrom.getDatasetPath()).thenReturn(new Path(sourceDir)); Mockito.when(copyFrom.getFsURI()).thenReturn(local); ComparableWatermark sw = new LongWatermark(sourceWatermark); Mockito.when(copyFrom.getWatermark()).thenReturn(Optional.of(sw)); Mockito.when(copyFrom.getFiles())//from w ww. j a v a2 s . co m .thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(sourceDir))); HadoopFsEndPoint copyTo = Mockito.mock(HadoopFsEndPoint.class); Mockito.when(copyTo.getDatasetPath()).thenReturn(new Path(destinationDir)); Mockito.when(copyTo.getFsURI()).thenReturn(local); Optional<ComparableWatermark> tmp = Optional.absent(); Mockito.when(copyTo.getWatermark()).thenReturn(tmp); Mockito.when(copyTo.getFiles()) .thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(destinationDir))); CopyRoute route = Mockito.mock(CopyRoute.class); Mockito.when(route.getCopyFrom()).thenReturn(copyFrom); Mockito.when(route.getCopyTo()).thenReturn(copyTo); ConfigBasedDataset dataset = new ConfigBasedDataset(mockRC, properties, route); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(localFs, copyConfiguration); Assert.assertEquals(copyableFiles.size(), 6); Set<Path> paths = Sets.newHashSet(new Path("dir1/file2"), new Path("dir1/file1"), new Path("dir2/file1"), new Path("dir2/file3")); for (CopyEntity copyEntity : copyableFiles) { if (copyEntity instanceof CopyableFile) { CopyableFile file = (CopyableFile) copyEntity; Path originRelativePath = PathUtils.relativizePath( PathUtils.getPathWithoutSchemeAndAuthority(file.getOrigin().getPath()), PathUtils.getPathWithoutSchemeAndAuthority(new Path(sourceDir))); Path targetRelativePath = PathUtils.relativizePath( PathUtils.getPathWithoutSchemeAndAuthority(file.getDestination()), PathUtils.getPathWithoutSchemeAndAuthority(new Path(destinationDir))); Assert.assertTrue(paths.contains(originRelativePath)); Assert.assertTrue(paths.contains(targetRelativePath)); Assert.assertEquals(originRelativePath, targetRelativePath); } else if (copyEntity instanceof PrePublishStep) { PrePublishStep pre = (PrePublishStep) copyEntity; Assert.assertTrue(pre.getStep() instanceof DeleteFileCommitStep); // need to delete this file Assert.assertTrue(pre.explain().indexOf("configBasedDatasetTest/dest/dir1/file1") > 0); } else if (copyEntity instanceof PostPublishStep) { PostPublishStep post = (PostPublishStep) copyEntity; Assert.assertTrue(post.getStep() instanceof WatermarkMetadataGenerationCommitStep); Assert.assertTrue(post.explain().indexOf("dest/_metadata") > 0 && post.explain().indexOf("" + sourceWatermark) > 0); } else { throw new Exception("Wrong type"); } } }