List of usage examples for org.apache.hadoop.fs FileSystem listFiles
public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive) throws FileNotFoundException, IOException
From source file:io.fluo.webindex.data.LoadHdfs.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { log.error("Usage: LoadHdfs <dataDir>"); System.exit(1);/*from w w w . j a v a 2 s.co m*/ } final String dataDir = args[0]; IndexEnv.validateDataDir(dataDir); final String hadoopConfDir = IndexEnv.getHadoopConfDir(); final int rateLimit = DataConfig.load().getLoadRateLimit(); List<String> loadPaths = new ArrayList<>(); FileSystem hdfs = IndexEnv.getHDFS(); RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(new Path(dataDir), true); while (listIter.hasNext()) { LocatedFileStatus status = listIter.next(); if (status.isFile()) { loadPaths.add(status.getPath().toString()); } } log.info("Loading {} files into Fluo from {}", loadPaths.size(), dataDir); SparkConf sparkConf = new SparkConf().setAppName("webindex-load-hdfs"); try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) { JavaRDD<String> paths = ctx.parallelize(loadPaths, loadPaths.size()); paths.foreachPartition(iter -> { final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo.properties")); final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null; FileSystem fs = IndexEnv.getHDFS(hadoopConfDir); try (FluoClient client = FluoFactory.newClient(fluoConfig); LoaderExecutor le = client.newLoaderExecutor()) { iter.forEachRemaining(path -> { Path filePath = new Path(path); try { if (fs.exists(filePath)) { FSDataInputStream fsin = fs.open(filePath); ArchiveReader reader = WARCReaderFactory.get(filePath.getName(), fsin, true); for (ArchiveRecord record : reader) { Page page = ArchiveUtil.buildPageIgnoreErrors(record); if (page.getOutboundLinks().size() > 0) { log.info("Loading page {} with {} links", page.getUrl(), page.getOutboundLinks().size()); if (rateLimiter != null) { rateLimiter.acquire(); } le.execute(PageLoader.updatePage(page)); } } } } catch (IOException e) { log.error("Exception while processing {}", path, e); } }); } }); } }
From source file:io.fluo.webindex.data.spark.IndexEnv.java
License:Apache License
public static void validateDataDir(String dataDir) { try {/*from w w w. j a va 2 s.c o m*/ FileSystem hdfs = getHDFS(); Path dataPath = new Path(dataDir); if (!hdfs.exists(dataPath)) { log.error("HDFS data directory {} does not exist", dataDir); System.exit(-1); } RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(dataPath, true); while (listIter.hasNext()) { LocatedFileStatus status = listIter.next(); if (status.isFile()) { return; } } log.error("HDFS data directory {} has no files", dataDir); System.exit(-1); } catch (IOException e) { throw new IllegalStateException(e); } }
From source file:nl.kpmg.lcm.server.data.hdfs.HdfsFileSystemAdapter.java
License:Apache License
@Override public List listFileNames(String subPath) throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", storage.getUrl()); FileSystem hdfs = FileSystem.get(conf); String storagePath = "/" + storage.getPath() + "/" + subPath; Path filePath = new Path(storagePath); if (!hdfs.exists(filePath)) { return null; }/*w ww .j av a 2s .co m*/ RemoteIterator<LocatedFileStatus> fileList = hdfs.listFiles(filePath, false); LinkedList<String> fileNameList = new LinkedList(); while (fileList.hasNext()) { LocatedFileStatus fileStatus = fileList.next(); fileNameList.add(fileStatus.getPath().getName()); } return fileNameList; }
From source file:org.apache.accumulo.test.GarbageCollectWALIT.java
License:Apache License
private int countWALsInFS(MiniAccumuloClusterImpl cluster) throws Exception { FileSystem fs = cluster.getFileSystem(); RemoteIterator<LocatedFileStatus> iterator = fs .listFiles(new Path(cluster.getConfig().getAccumuloDir() + "/wal"), true); int result = 0; while (iterator.hasNext()) { LocatedFileStatus next = iterator.next(); if (!next.isDirectory()) { result++;//from w w w. j a v a2s. com } } return result; }
From source file:org.apache.ambari.view.slider.SliderAppsViewControllerImpl.java
License:Apache License
private Validation validateHDFSAccess(final Map<String, String> hadoopConfigs, AmbariServiceInfo hdfsServiceInfo) { if (hdfsServiceInfo != null && hdfsServiceInfo.isStarted()) { if (hadoopConfigs.containsKey("fs.defaultFS")) { try { invokeHDFSClientRunnable(new HDFSClientRunnable<Boolean>() { @Override//from w w w.j ava2 s .com public Boolean run(FileSystem fs) throws IOException, InterruptedException { Path homePath = fs.getHomeDirectory(); fs.listFiles(homePath, false); return Boolean.TRUE; } }, hadoopConfigs); } catch (IOException e) { String message = "Slider View requires access to user's home directory in HDFS to proceed. Contact your administrator to create the home directory. (" + e.getMessage() + ")"; logger.warn(message, e); return new Validation(message); } catch (InterruptedException e) { String message = "Slider View requires access to user's home directory in HDFS to proceed. Contact your administrator to create the home directory. (" + e.getMessage() + ")"; logger.warn(message, e); return new Validation(message); } } else { return new Validation( "Location of HDFS filesystem is unknown for verification. Please check the 'fs.defaultFS' config in core-site.xml"); } } return null; }
From source file:org.apache.coheigea.bigdata.hdfs.HDFSTest.java
License:Apache License
@org.junit.Test public void testDirectoryPermissions() throws Exception { FileSystem fileSystem = hdfsCluster.getFileSystem(); // Write a file final Path file = new Path("/tmp/tmpdir/data-file4"); FSDataOutputStream out = fileSystem.create(file); for (int i = 0; i < 1024; ++i) { out.write(("data" + i + "\n").getBytes("UTF-8")); out.flush();//from ww w . j a va 2 s . c o m } out.close(); // Try to read the directory as "bob" - this should be allowed UserGroupInformation ugi = UserGroupInformation.createRemoteUser("bob"); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); fs.close(); return null; } }); // Change permissions so that the directory can't be read by "other" fileSystem.setPermission(file.getParent(), new FsPermission(FsAction.ALL, FsAction.READ, FsAction.NONE)); // Try to read the base directory as the file owner RemoteIterator<LocatedFileStatus> iter = fileSystem.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); // Now try to read the directory as "bob" again - this should fail ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); try { RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); Assert.fail("Failure expected on an incorrect permission"); } catch (AccessControlException ex) { // expected } fs.close(); return null; } }); }
From source file:org.apache.coheigea.bigdata.hdfs.ranger.HDFSRangerTest.java
License:Apache License
@org.junit.Test public void executeTest() throws Exception { FileSystem fileSystem = hdfsCluster.getFileSystem(); // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser" final Path file = new Path("/tmp/tmpdir3/data-file2"); FSDataOutputStream out = fileSystem.create(file); for (int i = 0; i < 1024; ++i) { out.write(("data" + i + "\n").getBytes("UTF-8")); out.flush();//from w w w. ja v a 2s . c o m } out.close(); // Change permissions to read-only fileSystem.setPermission(file, new FsPermission(FsAction.READ, FsAction.NONE, FsAction.NONE)); // Change the parent directory permissions to be execute only for the owner Path parentDir = new Path("/tmp/tmpdir3"); fileSystem.setPermission(parentDir, new FsPermission(FsAction.EXECUTE, FsAction.NONE, FsAction.NONE)); // Try to read the directory as "bob" - this should be allowed (by the policy - user) UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); fs.close(); return null; } }); // Try to read the directory as "alice" - this should be allowed (by the policy - group) ugi = UserGroupInformation.createUserForTesting("alice", new String[] { "IT" }); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); fs.close(); return null; } }); // Now try to read the directory as unknown user "eve" - this should not be allowed ugi = UserGroupInformation.createUserForTesting("eve", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); // Write to the file try { RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); Assert.fail("Failure expected on an incorrect permission"); } catch (RemoteException ex) { // expected Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName())); } fs.close(); return null; } }); // Now try to read the directory as known user "dave" - this should not be allowed, as he doesn't have the correct permissions ugi = UserGroupInformation.createUserForTesting("dave", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); // Write to the file try { RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); Assert.fail("Failure expected on an incorrect permission"); } catch (RemoteException ex) { // expected Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName())); } fs.close(); return null; } }); }
From source file:org.apache.drill.exec.rpc.user.TemporaryTablesAutomaticDropTest.java
License:Apache License
private File createAndCheckSessionTemporaryLocation(String suffix, File schemaLocation) throws Exception { String temporaryTableName = "temporary_table_automatic_drop_" + suffix; File sessionTemporaryLocation = schemaLocation.toPath().resolve(SESSION_UUID.toString()).toFile(); test("create TEMPORARY table %s.%s as select 'A' as c1 from (values(1))", DFS_TMP_SCHEMA, temporaryTableName);//from ww w . ja v a 2 s .c om FileSystem fs = getLocalFileSystem(); Path sessionPath = new Path(sessionTemporaryLocation.getAbsolutePath()); assertTrue("Session temporary location should exist", fs.exists(sessionPath)); assertEquals("Directory permission should match", StorageStrategy.TEMPORARY.getFolderPermission(), fs.getFileStatus(sessionPath).getPermission()); Path tempTablePath = new Path(sessionPath, SESSION_UUID.toString()); assertTrue("Temporary table location should exist", fs.exists(tempTablePath)); assertEquals("Directory permission should match", StorageStrategy.TEMPORARY.getFolderPermission(), fs.getFileStatus(tempTablePath).getPermission()); RemoteIterator<LocatedFileStatus> fileIterator = fs.listFiles(tempTablePath, false); while (fileIterator.hasNext()) { LocatedFileStatus file = fileIterator.next(); assertEquals("File permission should match", StorageStrategy.TEMPORARY.getFilePermission(), file.getPermission()); } return sessionTemporaryLocation; }
From source file:org.apache.drill.exec.udf.dynamic.TestDynamicUDFSupport.java
License:Apache License
@Test public void testSuccessfulRegistration() throws Exception { copyDefaultJarsToStagingArea();//from www .j ava2 s . c o m String summary = "The following UDFs in jar %s have been registered:\n" + "[custom_lower(VARCHAR-REQUIRED)]"; testBuilder().sqlQuery("create function using jar '%s'", defaultBinaryJar).unOrdered() .baselineColumns("ok", "summary").baselineValues(true, String.format(summary, defaultBinaryJar)) .go(); RemoteFunctionRegistry remoteFunctionRegistry = getDrillbitContext().getRemoteFunctionRegistry(); FileSystem fs = remoteFunctionRegistry.getFs(); assertFalse("Staging area should be empty", fs.listFiles(remoteFunctionRegistry.getStagingArea(), false).hasNext()); assertFalse("Temporary area should be empty", fs.listFiles(remoteFunctionRegistry.getTmpArea(), false).hasNext()); Path path = hadoopToJavaPath(remoteFunctionRegistry.getRegistryArea()); assertTrue("Binary should be present in registry area", path.resolve(defaultBinaryJar).toFile().exists()); assertTrue("Source should be present in registry area", path.resolve(defaultBinaryJar).toFile().exists()); Registry registry = remoteFunctionRegistry.getRegistry(new DataChangeVersion()); assertEquals("Registry should contain one jar", registry.getJarList().size(), 1); assertEquals(registry.getJar(0).getName(), defaultBinaryJar); }
From source file:org.apache.drill.exec.udf.dynamic.TestDynamicUDFSupport.java
License:Apache License
@Test public void testSuccessfulUnregistrationAfterSeveralRetryAttempts() throws Exception { RemoteFunctionRegistry remoteFunctionRegistry = spyRemoteFunctionRegistry(); copyDefaultJarsToStagingArea();//from w w w.j a v a 2 s . com test("create function using jar '%s'", defaultBinaryJar); reset(remoteFunctionRegistry); doThrow(new VersionMismatchException("Version mismatch detected", 1)) .doThrow(new VersionMismatchException("Version mismatch detected", 1)).doCallRealMethod() .when(remoteFunctionRegistry).updateRegistry(any(Registry.class), any(DataChangeVersion.class)); String summary = "The following UDFs in jar %s have been unregistered:\n" + "[custom_lower(VARCHAR-REQUIRED)]"; testBuilder().sqlQuery("drop function using jar '%s'", defaultBinaryJar).unOrdered() .baselineColumns("ok", "summary").baselineValues(true, String.format(summary, defaultBinaryJar)) .go(); verify(remoteFunctionRegistry, times(3)).updateRegistry(any(Registry.class), any(DataChangeVersion.class)); FileSystem fs = remoteFunctionRegistry.getFs(); assertFalse("Registry area should be empty", fs.listFiles(remoteFunctionRegistry.getRegistryArea(), false).hasNext()); assertEquals("Registry should be empty", remoteFunctionRegistry.getRegistry(new DataChangeVersion()).getJarList().size(), 0); }