Example usage for org.apache.hadoop.fs FileSystem listFiles

List of usage examples for org.apache.hadoop.fs FileSystem listFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listFiles.

Prototype

public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive)
        throws FileNotFoundException, IOException 

Source Link

Document

List the statuses and block locations of the files in the given path.

Usage

From source file:io.fluo.webindex.data.LoadHdfs.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        log.error("Usage: LoadHdfs <dataDir>");
        System.exit(1);/*from  w w w  . j  a  v a  2 s.co  m*/
    }
    final String dataDir = args[0];
    IndexEnv.validateDataDir(dataDir);

    final String hadoopConfDir = IndexEnv.getHadoopConfDir();
    final int rateLimit = DataConfig.load().getLoadRateLimit();

    List<String> loadPaths = new ArrayList<>();
    FileSystem hdfs = IndexEnv.getHDFS();
    RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(new Path(dataDir), true);
    while (listIter.hasNext()) {
        LocatedFileStatus status = listIter.next();
        if (status.isFile()) {
            loadPaths.add(status.getPath().toString());
        }
    }

    log.info("Loading {} files into Fluo from {}", loadPaths.size(), dataDir);

    SparkConf sparkConf = new SparkConf().setAppName("webindex-load-hdfs");
    try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {

        JavaRDD<String> paths = ctx.parallelize(loadPaths, loadPaths.size());

        paths.foreachPartition(iter -> {
            final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo.properties"));
            final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null;
            FileSystem fs = IndexEnv.getHDFS(hadoopConfDir);
            try (FluoClient client = FluoFactory.newClient(fluoConfig);
                    LoaderExecutor le = client.newLoaderExecutor()) {
                iter.forEachRemaining(path -> {
                    Path filePath = new Path(path);
                    try {
                        if (fs.exists(filePath)) {
                            FSDataInputStream fsin = fs.open(filePath);
                            ArchiveReader reader = WARCReaderFactory.get(filePath.getName(), fsin, true);
                            for (ArchiveRecord record : reader) {
                                Page page = ArchiveUtil.buildPageIgnoreErrors(record);
                                if (page.getOutboundLinks().size() > 0) {
                                    log.info("Loading page {} with {} links", page.getUrl(),
                                            page.getOutboundLinks().size());
                                    if (rateLimiter != null) {
                                        rateLimiter.acquire();
                                    }
                                    le.execute(PageLoader.updatePage(page));
                                }
                            }
                        }
                    } catch (IOException e) {
                        log.error("Exception while processing {}", path, e);
                    }
                });
            }
        });
    }
}

From source file:io.fluo.webindex.data.spark.IndexEnv.java

License:Apache License

public static void validateDataDir(String dataDir) {
    try {/*from  w  w  w. j  a  va 2 s.c o  m*/
        FileSystem hdfs = getHDFS();
        Path dataPath = new Path(dataDir);
        if (!hdfs.exists(dataPath)) {
            log.error("HDFS data directory {} does not exist", dataDir);
            System.exit(-1);
        }
        RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(dataPath, true);
        while (listIter.hasNext()) {
            LocatedFileStatus status = listIter.next();
            if (status.isFile()) {
                return;
            }
        }
        log.error("HDFS data directory {} has no files", dataDir);
        System.exit(-1);
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}

From source file:nl.kpmg.lcm.server.data.hdfs.HdfsFileSystemAdapter.java

License:Apache License

@Override
public List listFileNames(String subPath) throws IOException {

    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", storage.getUrl());
    FileSystem hdfs = FileSystem.get(conf);
    String storagePath = "/" + storage.getPath() + "/" + subPath;
    Path filePath = new Path(storagePath);
    if (!hdfs.exists(filePath)) {
        return null;
    }/*w ww  .j av  a 2s .co  m*/

    RemoteIterator<LocatedFileStatus> fileList = hdfs.listFiles(filePath, false);
    LinkedList<String> fileNameList = new LinkedList();

    while (fileList.hasNext()) {
        LocatedFileStatus fileStatus = fileList.next();
        fileNameList.add(fileStatus.getPath().getName());
    }

    return fileNameList;
}

From source file:org.apache.accumulo.test.GarbageCollectWALIT.java

License:Apache License

private int countWALsInFS(MiniAccumuloClusterImpl cluster) throws Exception {
    FileSystem fs = cluster.getFileSystem();
    RemoteIterator<LocatedFileStatus> iterator = fs
            .listFiles(new Path(cluster.getConfig().getAccumuloDir() + "/wal"), true);
    int result = 0;
    while (iterator.hasNext()) {
        LocatedFileStatus next = iterator.next();
        if (!next.isDirectory()) {
            result++;//from   w w  w. j  a v a2s. com
        }
    }
    return result;
}

From source file:org.apache.ambari.view.slider.SliderAppsViewControllerImpl.java

License:Apache License

private Validation validateHDFSAccess(final Map<String, String> hadoopConfigs,
        AmbariServiceInfo hdfsServiceInfo) {
    if (hdfsServiceInfo != null && hdfsServiceInfo.isStarted()) {
        if (hadoopConfigs.containsKey("fs.defaultFS")) {
            try {
                invokeHDFSClientRunnable(new HDFSClientRunnable<Boolean>() {
                    @Override//from  w  w  w.j ava2  s  .com
                    public Boolean run(FileSystem fs) throws IOException, InterruptedException {
                        Path homePath = fs.getHomeDirectory();
                        fs.listFiles(homePath, false);
                        return Boolean.TRUE;
                    }
                }, hadoopConfigs);
            } catch (IOException e) {
                String message = "Slider View requires access to user's home directory in HDFS to proceed. Contact your administrator to create the home directory. ("
                        + e.getMessage() + ")";
                logger.warn(message, e);
                return new Validation(message);
            } catch (InterruptedException e) {
                String message = "Slider View requires access to user's home directory in HDFS to proceed. Contact your administrator to create the home directory. ("
                        + e.getMessage() + ")";
                logger.warn(message, e);
                return new Validation(message);
            }
        } else {
            return new Validation(
                    "Location of HDFS filesystem is unknown for verification. Please check the 'fs.defaultFS' config in core-site.xml");
        }
    }
    return null;
}

From source file:org.apache.coheigea.bigdata.hdfs.HDFSTest.java

License:Apache License

@org.junit.Test
public void testDirectoryPermissions() throws Exception {
    FileSystem fileSystem = hdfsCluster.getFileSystem();

    // Write a file
    final Path file = new Path("/tmp/tmpdir/data-file4");
    FSDataOutputStream out = fileSystem.create(file);
    for (int i = 0; i < 1024; ++i) {
        out.write(("data" + i + "\n").getBytes("UTF-8"));
        out.flush();//from ww w  .  j a  va  2 s . c o m
    }
    out.close();

    // Try to read the directory as "bob" - this should be allowed
    UserGroupInformation ugi = UserGroupInformation.createRemoteUser("bob");
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
            Assert.assertTrue(iter.hasNext());

            fs.close();
            return null;
        }
    });

    // Change permissions so that the directory can't be read by "other"
    fileSystem.setPermission(file.getParent(), new FsPermission(FsAction.ALL, FsAction.READ, FsAction.NONE));

    // Try to read the base directory as the file owner
    RemoteIterator<LocatedFileStatus> iter = fileSystem.listFiles(file.getParent(), false);
    Assert.assertTrue(iter.hasNext());

    // Now try to read the directory as "bob" again - this should fail
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            try {
                RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
                Assert.assertTrue(iter.hasNext());
                Assert.fail("Failure expected on an incorrect permission");
            } catch (AccessControlException ex) {
                // expected
            }

            fs.close();
            return null;
        }
    });
}

From source file:org.apache.coheigea.bigdata.hdfs.ranger.HDFSRangerTest.java

License:Apache License

@org.junit.Test
public void executeTest() throws Exception {
    FileSystem fileSystem = hdfsCluster.getFileSystem();

    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    final Path file = new Path("/tmp/tmpdir3/data-file2");
    FSDataOutputStream out = fileSystem.create(file);
    for (int i = 0; i < 1024; ++i) {
        out.write(("data" + i + "\n").getBytes("UTF-8"));
        out.flush();//from w  w  w. ja  v  a  2s  . c  o m
    }
    out.close();

    // Change permissions to read-only
    fileSystem.setPermission(file, new FsPermission(FsAction.READ, FsAction.NONE, FsAction.NONE));

    // Change the parent directory permissions to be execute only for the owner
    Path parentDir = new Path("/tmp/tmpdir3");
    fileSystem.setPermission(parentDir, new FsPermission(FsAction.EXECUTE, FsAction.NONE, FsAction.NONE));

    // Try to read the directory as "bob" - this should be allowed (by the policy - user)
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[] {});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
            Assert.assertTrue(iter.hasNext());

            fs.close();
            return null;
        }
    });

    // Try to read the directory as "alice" - this should be allowed (by the policy - group)
    ugi = UserGroupInformation.createUserForTesting("alice", new String[] { "IT" });
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
            Assert.assertTrue(iter.hasNext());

            fs.close();
            return null;
        }
    });

    // Now try to read the directory as unknown user "eve" - this should not be allowed
    ugi = UserGroupInformation.createUserForTesting("eve", new String[] {});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            // Write to the file
            try {
                RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
                Assert.assertTrue(iter.hasNext());
                Assert.fail("Failure expected on an incorrect permission");
            } catch (RemoteException ex) {
                // expected
                Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName()));
            }

            fs.close();
            return null;
        }
    });

    // Now try to read the directory as known user "dave" - this should not be allowed, as he doesn't have the correct permissions
    ugi = UserGroupInformation.createUserForTesting("dave", new String[] {});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            // Write to the file
            try {
                RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
                Assert.assertTrue(iter.hasNext());
                Assert.fail("Failure expected on an incorrect permission");
            } catch (RemoteException ex) {
                // expected
                Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName()));
            }

            fs.close();
            return null;
        }
    });
}

From source file:org.apache.drill.exec.rpc.user.TemporaryTablesAutomaticDropTest.java

License:Apache License

private File createAndCheckSessionTemporaryLocation(String suffix, File schemaLocation) throws Exception {
    String temporaryTableName = "temporary_table_automatic_drop_" + suffix;
    File sessionTemporaryLocation = schemaLocation.toPath().resolve(SESSION_UUID.toString()).toFile();

    test("create TEMPORARY table %s.%s as select 'A' as c1 from (values(1))", DFS_TMP_SCHEMA,
            temporaryTableName);//from  ww  w  . ja  v a 2 s  .c om

    FileSystem fs = getLocalFileSystem();
    Path sessionPath = new Path(sessionTemporaryLocation.getAbsolutePath());
    assertTrue("Session temporary location should exist", fs.exists(sessionPath));
    assertEquals("Directory permission should match", StorageStrategy.TEMPORARY.getFolderPermission(),
            fs.getFileStatus(sessionPath).getPermission());
    Path tempTablePath = new Path(sessionPath, SESSION_UUID.toString());
    assertTrue("Temporary table location should exist", fs.exists(tempTablePath));
    assertEquals("Directory permission should match", StorageStrategy.TEMPORARY.getFolderPermission(),
            fs.getFileStatus(tempTablePath).getPermission());
    RemoteIterator<LocatedFileStatus> fileIterator = fs.listFiles(tempTablePath, false);
    while (fileIterator.hasNext()) {
        LocatedFileStatus file = fileIterator.next();
        assertEquals("File permission should match", StorageStrategy.TEMPORARY.getFilePermission(),
                file.getPermission());
    }
    return sessionTemporaryLocation;
}

From source file:org.apache.drill.exec.udf.dynamic.TestDynamicUDFSupport.java

License:Apache License

@Test
public void testSuccessfulRegistration() throws Exception {
    copyDefaultJarsToStagingArea();//from   www  .j  ava2 s . c  o  m

    String summary = "The following UDFs in jar %s have been registered:\n"
            + "[custom_lower(VARCHAR-REQUIRED)]";

    testBuilder().sqlQuery("create function using jar '%s'", defaultBinaryJar).unOrdered()
            .baselineColumns("ok", "summary").baselineValues(true, String.format(summary, defaultBinaryJar))
            .go();

    RemoteFunctionRegistry remoteFunctionRegistry = getDrillbitContext().getRemoteFunctionRegistry();
    FileSystem fs = remoteFunctionRegistry.getFs();

    assertFalse("Staging area should be empty",
            fs.listFiles(remoteFunctionRegistry.getStagingArea(), false).hasNext());
    assertFalse("Temporary area should be empty",
            fs.listFiles(remoteFunctionRegistry.getTmpArea(), false).hasNext());

    Path path = hadoopToJavaPath(remoteFunctionRegistry.getRegistryArea());

    assertTrue("Binary should be present in registry area", path.resolve(defaultBinaryJar).toFile().exists());
    assertTrue("Source should be present in registry area", path.resolve(defaultBinaryJar).toFile().exists());

    Registry registry = remoteFunctionRegistry.getRegistry(new DataChangeVersion());
    assertEquals("Registry should contain one jar", registry.getJarList().size(), 1);
    assertEquals(registry.getJar(0).getName(), defaultBinaryJar);
}

From source file:org.apache.drill.exec.udf.dynamic.TestDynamicUDFSupport.java

License:Apache License

@Test
public void testSuccessfulUnregistrationAfterSeveralRetryAttempts() throws Exception {
    RemoteFunctionRegistry remoteFunctionRegistry = spyRemoteFunctionRegistry();
    copyDefaultJarsToStagingArea();//from  w w w.j  a  v  a  2  s .  com
    test("create function using jar '%s'", defaultBinaryJar);

    reset(remoteFunctionRegistry);
    doThrow(new VersionMismatchException("Version mismatch detected", 1))
            .doThrow(new VersionMismatchException("Version mismatch detected", 1)).doCallRealMethod()
            .when(remoteFunctionRegistry).updateRegistry(any(Registry.class), any(DataChangeVersion.class));

    String summary = "The following UDFs in jar %s have been unregistered:\n"
            + "[custom_lower(VARCHAR-REQUIRED)]";

    testBuilder().sqlQuery("drop function using jar '%s'", defaultBinaryJar).unOrdered()
            .baselineColumns("ok", "summary").baselineValues(true, String.format(summary, defaultBinaryJar))
            .go();

    verify(remoteFunctionRegistry, times(3)).updateRegistry(any(Registry.class), any(DataChangeVersion.class));

    FileSystem fs = remoteFunctionRegistry.getFs();

    assertFalse("Registry area should be empty",
            fs.listFiles(remoteFunctionRegistry.getRegistryArea(), false).hasNext());
    assertEquals("Registry should be empty",
            remoteFunctionRegistry.getRegistry(new DataChangeVersion()).getJarList().size(), 0);
}