Example usage for org.apache.hadoop.fs FileSystem listFiles

List of usage examples for org.apache.hadoop.fs FileSystem listFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listFiles.

Prototype

public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive)
        throws FileNotFoundException, IOException 

Source Link

Document

List the statuses and block locations of the files in the given path.

Usage

From source file:org.trend.hgraph.util.test.HGraphClientPerformanceTestTest.java

License:Apache License

private File mergeResults(Configuration conf, String outputPath, String tmpFileName)
        throws IOException, FileNotFoundException {
    Path path = new Path(outputPath);
    FileSystem fs = path.getFileSystem(conf);
    RemoteIterator<LocatedFileStatus> it = fs.listFiles(path, false);
    LocatedFileStatus lfs = null;//w  w w .  ja  v  a2s.  c  o m
    InputStream is = null;
    String fn = null;
    String content = null;
    File tf = File.createTempFile(tmpFileName, null);
    FileWriter tfw = new FileWriter(tf);
    while (it.hasNext()) {
        lfs = it.next();
        fn = lfs.getPath().getName();
        if (fn.startsWith("part-")) {
            System.out.println("content for file:" + fn);
            is = fs.open(lfs.getPath());
            content = IOUtils.toString(is);
            tfw.write(content);
            IOUtils.closeQuietly(is);
        }
    }
    IOUtils.closeQuietly(tfw);
    return tf;
}

From source file:org.trustedanalytics.utils.hdfs.HdfsConfigFactoryTest.java

License:Apache License

private void verifyFileSystem(FileSystem fs) throws IOException {
    fs.createNewFile(new Path(SOME_FILE_NAME));
    RemoteIterator<LocatedFileStatus> files = fs.listFiles(hdfsConfig.getPath(), false);
    assertThat("Creating file failed", files.hasNext(), equalTo(true));
    LocatedFileStatus remoteFile = files.next();
    assertThat("file wasn't created in proper folder", remoteFile.getPath().toUri().toString(),
            containsString(hdfsConfig.getPath().toUri().toString()));
    assertThat("file wasn't created in proper folder", remoteFile.getPath().toUri().toString(),
            containsString(SOME_FILE_NAME));
    assertThat("More then 1 file was created", files.hasNext(), equalTo(false));
    testAppendFunction(fs, remoteFile);//from  w w w  .j av  a  2 s. c  o  m
}

From source file:uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.tool.FetchElementsFromHdfsTool.java

License:Apache License

private void checkHdfsDirectories(final AddElementsFromHdfs operation) throws IOException {
    LOGGER.info("Checking that the correct HDFS directories exist");
    final FileSystem fs = FileSystem.get(getConf());

    final Path outputPath = new Path(operation.getOutputPath());
    LOGGER.info("Ensuring output directory {} doesn't exist", outputPath);
    if (fs.exists(outputPath)) {
        if (fs.listFiles(outputPath, true).hasNext()) {
            LOGGER.error("Output directory exists and is not empty: {}", outputPath);
            throw new IllegalArgumentException("Output directory exists and is not empty: " + outputPath);
        }//  www .j  a v a2 s.c  o  m
        LOGGER.info("Output directory exists and is empty so deleting: {}", outputPath);
        fs.delete(outputPath, true);
    }

}

From source file:uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.tool.ImportElementsToAccumuloTool.java

License:Apache License

private void checkHdfsDirectories(final String failurePathString, final FileSystem fs) throws IOException {
    LOGGER.info("Checking that the correct HDFS directories exist");

    final Path failurePath = new Path(failurePathString);
    LOGGER.info("Ensuring failure directory {} exists", failurePath);
    if (fs.exists(failurePath)) {
        if (fs.listFiles(failurePath, true).hasNext()) {
            LOGGER.error("Failure directory exists and is not empty: {}", failurePath);
            throw new IllegalArgumentException("Failure directory is not empty: " + failurePath);
        }/*from ww w . j  a va  2 s.  c o  m*/
    } else {
        LOGGER.info("Failure directory doesn't exist so creating: {}", failurePath);
        fs.mkdirs(failurePath);
    }
    IngestUtils.setDirectoryPermsForAccumulo(fs, failurePath);
}

From source file:webindex.data.LoadHdfs.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        log.error("Usage: LoadHdfs <dataDir>");
        System.exit(1);/*from  w  w  w .  j a  v  a 2  s .c  om*/
    }
    final String dataDir = args[0];
    IndexEnv.validateDataDir(dataDir);

    final String hadoopConfDir = IndexEnv.getHadoopConfDir();
    final WebIndexConfig webIndexConfig = WebIndexConfig.load();
    final int rateLimit = webIndexConfig.getLoadRateLimit();
    final String appName = webIndexConfig.fluoApp;

    List<String> loadPaths = new ArrayList<>();
    FileSystem hdfs = IndexEnv.getHDFS();
    RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(new Path(dataDir), true);
    while (listIter.hasNext()) {
        LocatedFileStatus status = listIter.next();
        if (status.isFile()) {
            loadPaths.add(status.getPath().toString());
        }
    }

    log.info("Loading {} files into Fluo from {}", loadPaths.size(), dataDir);

    SparkConf sparkConf = new SparkConf().setAppName("webindex-load-hdfs");
    try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {

        JavaRDD<String> paths = ctx.parallelize(loadPaths, loadPaths.size());

        paths.foreachPartition(iter -> {
            final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo-conn.properties"));
            fluoConfig.setApplicationName(appName);
            final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null;
            FileSystem fs = IndexEnv.getHDFS(hadoopConfDir);
            try (FluoClient client = FluoFactory.newClient(fluoConfig);
                    LoaderExecutor le = client.newLoaderExecutor()) {
                iter.forEachRemaining(path -> {
                    Path filePath = new Path(path);
                    try {
                        if (fs.exists(filePath)) {
                            FSDataInputStream fsin = fs.open(filePath);
                            ArchiveReader reader = WARCReaderFactory.get(filePath.getName(), fsin, true);
                            for (ArchiveRecord record : reader) {
                                Page page = ArchiveUtil.buildPageIgnoreErrors(record);
                                if (page.getOutboundLinks().size() > 0) {
                                    log.info("Loading page {} with {} links", page.getUrl(),
                                            page.getOutboundLinks().size());
                                    if (rateLimiter != null) {
                                        rateLimiter.acquire();
                                    }
                                    le.execute(PageLoader.updatePage(page));
                                }
                            }
                        }
                    } catch (IOException e) {
                        log.error("Exception while processing {}", path, e);
                    }
                });
            }
        });
    }
}