Example usage for org.apache.hadoop.fs FileSystem listFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listFiles.

Prototype

public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive)
        throws FileNotFoundException, IOException

Source Link

Document

List the statuses and block locations of the files in the given path.

Usage

From source file:org.trend.hgraph.util.test.HGraphClientPerformanceTestTest.java

License:Apache License

private File mergeResults(Configuration conf, String outputPath, String tmpFileName)
        throws IOException, FileNotFoundException {
    Path path = new Path(outputPath);
    FileSystem fs = path.getFileSystem(conf);
    RemoteIterator<LocatedFileStatus> it = fs.listFiles(path, false);
    LocatedFileStatus lfs = null;//w  w w .  ja  v  a2s.  c  o m
    InputStream is = null;
    String fn = null;
    String content = null;
    File tf = File.createTempFile(tmpFileName, null);
    FileWriter tfw = new FileWriter(tf);
    while (it.hasNext()) {
        lfs = it.next();
        fn = lfs.getPath().getName();
        if (fn.startsWith("part-")) {
            System.out.println("content for file:" + fn);
            is = fs.open(lfs.getPath());
            content = IOUtils.toString(is);
            tfw.write(content);
            IOUtils.closeQuietly(is);
        }
    }
    IOUtils.closeQuietly(tfw);
    return tf;
}

From source file:org.trustedanalytics.utils.hdfs.HdfsConfigFactoryTest.java

License:Apache License

private void verifyFileSystem(FileSystem fs) throws IOException {
    fs.createNewFile(new Path(SOME_FILE_NAME));
    RemoteIterator<LocatedFileStatus> files = fs.listFiles(hdfsConfig.getPath(), false);
    assertThat("Creating file failed", files.hasNext(), equalTo(true));
    LocatedFileStatus remoteFile = files.next();
    assertThat("file wasn't created in proper folder", remoteFile.getPath().toUri().toString(),
            containsString(hdfsConfig.getPath().toUri().toString()));
    assertThat("file wasn't created in proper folder", remoteFile.getPath().toUri().toString(),
            containsString(SOME_FILE_NAME));
    assertThat("More then 1 file was created", files.hasNext(), equalTo(false));
    testAppendFunction(fs, remoteFile);//from  w w w  .j av  a  2 s. c  o  m
}

From source file:uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.tool.FetchElementsFromHdfsTool.java

License:Apache License

private void checkHdfsDirectories(final AddElementsFromHdfs operation) throws IOException {
    LOGGER.info("Checking that the correct HDFS directories exist");
    final FileSystem fs = FileSystem.get(getConf());

    final Path outputPath = new Path(operation.getOutputPath());
    LOGGER.info("Ensuring output directory {} doesn't exist", outputPath);
    if (fs.exists(outputPath)) {
        if (fs.listFiles(outputPath, true).hasNext()) {
            LOGGER.error("Output directory exists and is not empty: {}", outputPath);
            throw new IllegalArgumentException("Output directory exists and is not empty: " + outputPath);
        }//  www .j  a v a2 s.c  o  m
        LOGGER.info("Output directory exists and is empty so deleting: {}", outputPath);
        fs.delete(outputPath, true);
    }

}

From source file:uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.tool.ImportElementsToAccumuloTool.java

License:Apache License

private void checkHdfsDirectories(final String failurePathString, final FileSystem fs) throws IOException {
    LOGGER.info("Checking that the correct HDFS directories exist");

    final Path failurePath = new Path(failurePathString);
    LOGGER.info("Ensuring failure directory {} exists", failurePath);
    if (fs.exists(failurePath)) {
        if (fs.listFiles(failurePath, true).hasNext()) {
            LOGGER.error("Failure directory exists and is not empty: {}", failurePath);
            throw new IllegalArgumentException("Failure directory is not empty: " + failurePath);
        }/*from ww w . j  a va  2 s.  c o  m*/
    } else {
        LOGGER.info("Failure directory doesn't exist so creating: {}", failurePath);
        fs.mkdirs(failurePath);
    }
    IngestUtils.setDirectoryPermsForAccumulo(fs, failurePath);
}

From source file:webindex.data.LoadHdfs.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        log.error("Usage: LoadHdfs <dataDir>");
        System.exit(1);/*from  w  w  w .  j a  v  a 2  s .c  om*/
    }
    final String dataDir = args[0];
    IndexEnv.validateDataDir(dataDir);

    final String hadoopConfDir = IndexEnv.getHadoopConfDir();
    final WebIndexConfig webIndexConfig = WebIndexConfig.load();
    final int rateLimit = webIndexConfig.getLoadRateLimit();
    final String appName = webIndexConfig.fluoApp;

    List<String> loadPaths = new ArrayList<>();
    FileSystem hdfs = IndexEnv.getHDFS();
    RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(new Path(dataDir), true);
    while (listIter.hasNext()) {
        LocatedFileStatus status = listIter.next();
        if (status.isFile()) {
            loadPaths.add(status.getPath().toString());
        }
    }

    log.info("Loading {} files into Fluo from {}", loadPaths.size(), dataDir);

    SparkConf sparkConf = new SparkConf().setAppName("webindex-load-hdfs");
    try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {

        JavaRDD<String> paths = ctx.parallelize(loadPaths, loadPaths.size());

        paths.foreachPartition(iter -> {
            final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo-conn.properties"));
            fluoConfig.setApplicationName(appName);
            final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null;
            FileSystem fs = IndexEnv.getHDFS(hadoopConfDir);
            try (FluoClient client = FluoFactory.newClient(fluoConfig);
                    LoaderExecutor le = client.newLoaderExecutor()) {
                iter.forEachRemaining(path -> {
                    Path filePath = new Path(path);
                    try {
                        if (fs.exists(filePath)) {
                            FSDataInputStream fsin = fs.open(filePath);
                            ArchiveReader reader = WARCReaderFactory.get(filePath.getName(), fsin, true);
                            for (ArchiveRecord record : reader) {
                                Page page = ArchiveUtil.buildPageIgnoreErrors(record);
                                if (page.getOutboundLinks().size() > 0) {
                                    log.info("Loading page {} with {} links", page.getUrl(),
                                            page.getOutboundLinks().size());
                                    if (rateLimiter != null) {
                                        rateLimiter.acquire();
                                    }
                                    le.execute(PageLoader.updatePage(page));
                                }
                            }
                        }
                    } catch (IOException e) {
                        log.error("Exception while processing {}", path, e);
                    }
                });
            }
        });
    }
}