List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath
public Path getPath()
From source file:org.trend.hgraph.util.test.GetRandomRowsByRegionsTest.java
License:Apache License
@Test public void test_run_b2t3() throws Exception { String outputPath = "/run_b2t3"; GetRandomRowsByRegions tool = new GetRandomRowsByRegions(TEST_UTIL.getConfiguration()); int status = tool.run(new String[] { "-b", "2", "-t", "3", TABLE, outputPath }); Assert.assertEquals(0, status);/*from w ww . jav a 2 s . co m*/ // get content, for manual test purpose Path path = new Path(outputPath); FileSystem fs = path.getFileSystem(TEST_UTIL.getConfiguration()); RemoteIterator<LocatedFileStatus> it = fs.listFiles(path, false); LocatedFileStatus lfs = null; InputStream is = null; String fn = null; while (it.hasNext()) { lfs = it.next(); fn = lfs.getPath().getName(); if (fn.startsWith("part-")) { System.out.println("content for file:" + fn); is = fs.open(lfs.getPath()); System.out.println(IOUtils.toString(is)); IOUtils.closeQuietly(is); } } }
From source file:org.trend.hgraph.util.test.HGraphClientPerformanceTestTest.java
License:Apache License
private File mergeResults(Configuration conf, String outputPath, String tmpFileName) throws IOException, FileNotFoundException { Path path = new Path(outputPath); FileSystem fs = path.getFileSystem(conf); RemoteIterator<LocatedFileStatus> it = fs.listFiles(path, false); LocatedFileStatus lfs = null; InputStream is = null;//from w w w . j ava 2 s . c om String fn = null; String content = null; File tf = File.createTempFile(tmpFileName, null); FileWriter tfw = new FileWriter(tf); while (it.hasNext()) { lfs = it.next(); fn = lfs.getPath().getName(); if (fn.startsWith("part-")) { System.out.println("content for file:" + fn); is = fs.open(lfs.getPath()); content = IOUtils.toString(is); tfw.write(content); IOUtils.closeQuietly(is); } } IOUtils.closeQuietly(tfw); return tf; }
From source file:org.trustedanalytics.metadata.parser.MetadataParseTask.java
License:Apache License
private void processDirectory(Path sourcePath, List<InputStream> inputStreams) throws IOException { RemoteIterator<LocatedFileStatus> iterator = fileSystem.listFiles(sourcePath, false); while (iterator.hasNext()) { LocatedFileStatus status = iterator.next(); if (status.isFile()) { LOG.info("File found: {}", status.getPath()); processFile(status.getPath(), inputStreams); }// w w w.j a va2s . c o m } }
From source file:org.trustedanalytics.utils.hdfs.HdfsConfigFactoryTest.java
License:Apache License
private void verifyFileSystem(FileSystem fs) throws IOException { fs.createNewFile(new Path(SOME_FILE_NAME)); RemoteIterator<LocatedFileStatus> files = fs.listFiles(hdfsConfig.getPath(), false); assertThat("Creating file failed", files.hasNext(), equalTo(true)); LocatedFileStatus remoteFile = files.next(); assertThat("file wasn't created in proper folder", remoteFile.getPath().toUri().toString(), containsString(hdfsConfig.getPath().toUri().toString())); assertThat("file wasn't created in proper folder", remoteFile.getPath().toUri().toString(), containsString(SOME_FILE_NAME)); assertThat("More then 1 file was created", files.hasNext(), equalTo(false)); testAppendFunction(fs, remoteFile);//from w ww . j a v a 2 s . c o m }
From source file:org.trustedanalytics.utils.hdfs.HdfsConfigFactoryTest.java
License:Apache License
private void testAppendFunction(FileSystem fs, LocatedFileStatus remoteFile) throws IOException { try (OutputStream out = fs.append(remoteFile.getPath())) { out.write(APPEND_TEXT.getBytes()); }/*from w w w.ja v a 2 s. c o m*/ try (InputStreamReader in = new InputStreamReader(fs.open(remoteFile.getPath())); BufferedReader br = new BufferedReader(in)) { String content = br.readLine(); assert br.readLine() == null; assertThat("File was not appended properly", content, containsString(APPEND_TEXT)); } }
From source file:webindex.data.LoadHdfs.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { log.error("Usage: LoadHdfs <dataDir>"); System.exit(1);/*from w w w . j a v a 2 s. c om*/ } final String dataDir = args[0]; IndexEnv.validateDataDir(dataDir); final String hadoopConfDir = IndexEnv.getHadoopConfDir(); final WebIndexConfig webIndexConfig = WebIndexConfig.load(); final int rateLimit = webIndexConfig.getLoadRateLimit(); final String appName = webIndexConfig.fluoApp; List<String> loadPaths = new ArrayList<>(); FileSystem hdfs = IndexEnv.getHDFS(); RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(new Path(dataDir), true); while (listIter.hasNext()) { LocatedFileStatus status = listIter.next(); if (status.isFile()) { loadPaths.add(status.getPath().toString()); } } log.info("Loading {} files into Fluo from {}", loadPaths.size(), dataDir); SparkConf sparkConf = new SparkConf().setAppName("webindex-load-hdfs"); try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) { JavaRDD<String> paths = ctx.parallelize(loadPaths, loadPaths.size()); paths.foreachPartition(iter -> { final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo-conn.properties")); fluoConfig.setApplicationName(appName); final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null; FileSystem fs = IndexEnv.getHDFS(hadoopConfDir); try (FluoClient client = FluoFactory.newClient(fluoConfig); LoaderExecutor le = client.newLoaderExecutor()) { iter.forEachRemaining(path -> { Path filePath = new Path(path); try { if (fs.exists(filePath)) { FSDataInputStream fsin = fs.open(filePath); ArchiveReader reader = WARCReaderFactory.get(filePath.getName(), fsin, true); for (ArchiveRecord record : reader) { Page page = ArchiveUtil.buildPageIgnoreErrors(record); if (page.getOutboundLinks().size() > 0) { log.info("Loading page {} with {} links", page.getUrl(), page.getOutboundLinks().size()); if (rateLimiter != null) { rateLimiter.acquire(); } le.execute(PageLoader.updatePage(page)); } } } } catch (IOException e) { log.error("Exception while processing {}", path, e); } }); } }); } }