List of usage examples for org.apache.hadoop.fs FileSystem listFiles
public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive) throws FileNotFoundException, IOException
From source file:org.trend.hgraph.util.test.HGraphClientPerformanceTestTest.java
License:Apache License
private File mergeResults(Configuration conf, String outputPath, String tmpFileName) throws IOException, FileNotFoundException { Path path = new Path(outputPath); FileSystem fs = path.getFileSystem(conf); RemoteIterator<LocatedFileStatus> it = fs.listFiles(path, false); LocatedFileStatus lfs = null;//w w w . ja v a2s. c o m InputStream is = null; String fn = null; String content = null; File tf = File.createTempFile(tmpFileName, null); FileWriter tfw = new FileWriter(tf); while (it.hasNext()) { lfs = it.next(); fn = lfs.getPath().getName(); if (fn.startsWith("part-")) { System.out.println("content for file:" + fn); is = fs.open(lfs.getPath()); content = IOUtils.toString(is); tfw.write(content); IOUtils.closeQuietly(is); } } IOUtils.closeQuietly(tfw); return tf; }
From source file:org.trustedanalytics.utils.hdfs.HdfsConfigFactoryTest.java
License:Apache License
private void verifyFileSystem(FileSystem fs) throws IOException { fs.createNewFile(new Path(SOME_FILE_NAME)); RemoteIterator<LocatedFileStatus> files = fs.listFiles(hdfsConfig.getPath(), false); assertThat("Creating file failed", files.hasNext(), equalTo(true)); LocatedFileStatus remoteFile = files.next(); assertThat("file wasn't created in proper folder", remoteFile.getPath().toUri().toString(), containsString(hdfsConfig.getPath().toUri().toString())); assertThat("file wasn't created in proper folder", remoteFile.getPath().toUri().toString(), containsString(SOME_FILE_NAME)); assertThat("More then 1 file was created", files.hasNext(), equalTo(false)); testAppendFunction(fs, remoteFile);//from w w w .j av a 2 s. c o m }
From source file:uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.tool.FetchElementsFromHdfsTool.java
License:Apache License
private void checkHdfsDirectories(final AddElementsFromHdfs operation) throws IOException { LOGGER.info("Checking that the correct HDFS directories exist"); final FileSystem fs = FileSystem.get(getConf()); final Path outputPath = new Path(operation.getOutputPath()); LOGGER.info("Ensuring output directory {} doesn't exist", outputPath); if (fs.exists(outputPath)) { if (fs.listFiles(outputPath, true).hasNext()) { LOGGER.error("Output directory exists and is not empty: {}", outputPath); throw new IllegalArgumentException("Output directory exists and is not empty: " + outputPath); }// www .j a v a2 s.c o m LOGGER.info("Output directory exists and is empty so deleting: {}", outputPath); fs.delete(outputPath, true); } }
From source file:uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.tool.ImportElementsToAccumuloTool.java
License:Apache License
private void checkHdfsDirectories(final String failurePathString, final FileSystem fs) throws IOException { LOGGER.info("Checking that the correct HDFS directories exist"); final Path failurePath = new Path(failurePathString); LOGGER.info("Ensuring failure directory {} exists", failurePath); if (fs.exists(failurePath)) { if (fs.listFiles(failurePath, true).hasNext()) { LOGGER.error("Failure directory exists and is not empty: {}", failurePath); throw new IllegalArgumentException("Failure directory is not empty: " + failurePath); }/*from ww w . j a va 2 s. c o m*/ } else { LOGGER.info("Failure directory doesn't exist so creating: {}", failurePath); fs.mkdirs(failurePath); } IngestUtils.setDirectoryPermsForAccumulo(fs, failurePath); }
From source file:webindex.data.LoadHdfs.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { log.error("Usage: LoadHdfs <dataDir>"); System.exit(1);/*from w w w . j a v a 2 s .c om*/ } final String dataDir = args[0]; IndexEnv.validateDataDir(dataDir); final String hadoopConfDir = IndexEnv.getHadoopConfDir(); final WebIndexConfig webIndexConfig = WebIndexConfig.load(); final int rateLimit = webIndexConfig.getLoadRateLimit(); final String appName = webIndexConfig.fluoApp; List<String> loadPaths = new ArrayList<>(); FileSystem hdfs = IndexEnv.getHDFS(); RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(new Path(dataDir), true); while (listIter.hasNext()) { LocatedFileStatus status = listIter.next(); if (status.isFile()) { loadPaths.add(status.getPath().toString()); } } log.info("Loading {} files into Fluo from {}", loadPaths.size(), dataDir); SparkConf sparkConf = new SparkConf().setAppName("webindex-load-hdfs"); try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) { JavaRDD<String> paths = ctx.parallelize(loadPaths, loadPaths.size()); paths.foreachPartition(iter -> { final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo-conn.properties")); fluoConfig.setApplicationName(appName); final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null; FileSystem fs = IndexEnv.getHDFS(hadoopConfDir); try (FluoClient client = FluoFactory.newClient(fluoConfig); LoaderExecutor le = client.newLoaderExecutor()) { iter.forEachRemaining(path -> { Path filePath = new Path(path); try { if (fs.exists(filePath)) { FSDataInputStream fsin = fs.open(filePath); ArchiveReader reader = WARCReaderFactory.get(filePath.getName(), fsin, true); for (ArchiveRecord record : reader) { Page page = ArchiveUtil.buildPageIgnoreErrors(record); if (page.getOutboundLinks().size() > 0) { log.info("Loading page {} with {} links", page.getUrl(), page.getOutboundLinks().size()); if (rateLimiter != null) { rateLimiter.acquire(); } le.execute(PageLoader.updatePage(page)); } } } } catch (IOException e) { log.error("Exception while processing {}", path, e); } }); } }); } }