Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:org.trend.hgraph.util.test.GetRandomRowsByRegionsTest.java

License:Apache License

@Test
public void test_run_b2t3() throws Exception {
    String outputPath = "/run_b2t3";
    GetRandomRowsByRegions tool = new GetRandomRowsByRegions(TEST_UTIL.getConfiguration());
    int status = tool.run(new String[] { "-b", "2", "-t", "3", TABLE, outputPath });
    Assert.assertEquals(0, status);/*from  w  ww . jav a  2 s .  co m*/
    // get content, for manual test purpose
    Path path = new Path(outputPath);
    FileSystem fs = path.getFileSystem(TEST_UTIL.getConfiguration());
    RemoteIterator<LocatedFileStatus> it = fs.listFiles(path, false);
    LocatedFileStatus lfs = null;
    InputStream is = null;
    String fn = null;
    while (it.hasNext()) {
        lfs = it.next();
        fn = lfs.getPath().getName();
        if (fn.startsWith("part-")) {
            System.out.println("content for file:" + fn);
            is = fs.open(lfs.getPath());
            System.out.println(IOUtils.toString(is));
            IOUtils.closeQuietly(is);
        }
    }
}

From source file:org.trend.hgraph.util.test.HGraphClientPerformanceTestTest.java

License:Apache License

private File mergeResults(Configuration conf, String outputPath, String tmpFileName)
        throws IOException, FileNotFoundException {
    Path path = new Path(outputPath);
    FileSystem fs = path.getFileSystem(conf);
    RemoteIterator<LocatedFileStatus> it = fs.listFiles(path, false);
    LocatedFileStatus lfs = null;
    InputStream is = null;//from   w  w  w . j  ava 2 s  .  c om
    String fn = null;
    String content = null;
    File tf = File.createTempFile(tmpFileName, null);
    FileWriter tfw = new FileWriter(tf);
    while (it.hasNext()) {
        lfs = it.next();
        fn = lfs.getPath().getName();
        if (fn.startsWith("part-")) {
            System.out.println("content for file:" + fn);
            is = fs.open(lfs.getPath());
            content = IOUtils.toString(is);
            tfw.write(content);
            IOUtils.closeQuietly(is);
        }
    }
    IOUtils.closeQuietly(tfw);
    return tf;
}

From source file:org.trustedanalytics.metadata.parser.MetadataParseTask.java

License:Apache License

private void processDirectory(Path sourcePath, List<InputStream> inputStreams) throws IOException {
    RemoteIterator<LocatedFileStatus> iterator = fileSystem.listFiles(sourcePath, false);
    while (iterator.hasNext()) {
        LocatedFileStatus status = iterator.next();
        if (status.isFile()) {
            LOG.info("File found: {}", status.getPath());
            processFile(status.getPath(), inputStreams);
        }//  w  w  w.j a va2s . c o m
    }
}

From source file:org.trustedanalytics.utils.hdfs.HdfsConfigFactoryTest.java

License:Apache License

private void verifyFileSystem(FileSystem fs) throws IOException {
    fs.createNewFile(new Path(SOME_FILE_NAME));
    RemoteIterator<LocatedFileStatus> files = fs.listFiles(hdfsConfig.getPath(), false);
    assertThat("Creating file failed", files.hasNext(), equalTo(true));
    LocatedFileStatus remoteFile = files.next();
    assertThat("file wasn't created in proper folder", remoteFile.getPath().toUri().toString(),
            containsString(hdfsConfig.getPath().toUri().toString()));
    assertThat("file wasn't created in proper folder", remoteFile.getPath().toUri().toString(),
            containsString(SOME_FILE_NAME));
    assertThat("More then 1 file was created", files.hasNext(), equalTo(false));
    testAppendFunction(fs, remoteFile);//from w ww  .  j  a  v a 2  s  .  c  o m
}

From source file:org.trustedanalytics.utils.hdfs.HdfsConfigFactoryTest.java

License:Apache License

private void testAppendFunction(FileSystem fs, LocatedFileStatus remoteFile) throws IOException {
    try (OutputStream out = fs.append(remoteFile.getPath())) {
        out.write(APPEND_TEXT.getBytes());
    }/*from w w  w.ja  v a  2  s.  c o  m*/

    try (InputStreamReader in = new InputStreamReader(fs.open(remoteFile.getPath()));
            BufferedReader br = new BufferedReader(in)) {
        String content = br.readLine();
        assert br.readLine() == null;
        assertThat("File was not appended properly", content, containsString(APPEND_TEXT));
    }
}

From source file:webindex.data.LoadHdfs.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        log.error("Usage: LoadHdfs <dataDir>");
        System.exit(1);/*from   w  w  w .  j  a  v a  2  s.  c om*/
    }
    final String dataDir = args[0];
    IndexEnv.validateDataDir(dataDir);

    final String hadoopConfDir = IndexEnv.getHadoopConfDir();
    final WebIndexConfig webIndexConfig = WebIndexConfig.load();
    final int rateLimit = webIndexConfig.getLoadRateLimit();
    final String appName = webIndexConfig.fluoApp;

    List<String> loadPaths = new ArrayList<>();
    FileSystem hdfs = IndexEnv.getHDFS();
    RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(new Path(dataDir), true);
    while (listIter.hasNext()) {
        LocatedFileStatus status = listIter.next();
        if (status.isFile()) {
            loadPaths.add(status.getPath().toString());
        }
    }

    log.info("Loading {} files into Fluo from {}", loadPaths.size(), dataDir);

    SparkConf sparkConf = new SparkConf().setAppName("webindex-load-hdfs");
    try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {

        JavaRDD<String> paths = ctx.parallelize(loadPaths, loadPaths.size());

        paths.foreachPartition(iter -> {
            final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo-conn.properties"));
            fluoConfig.setApplicationName(appName);
            final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null;
            FileSystem fs = IndexEnv.getHDFS(hadoopConfDir);
            try (FluoClient client = FluoFactory.newClient(fluoConfig);
                    LoaderExecutor le = client.newLoaderExecutor()) {
                iter.forEachRemaining(path -> {
                    Path filePath = new Path(path);
                    try {
                        if (fs.exists(filePath)) {
                            FSDataInputStream fsin = fs.open(filePath);
                            ArchiveReader reader = WARCReaderFactory.get(filePath.getName(), fsin, true);
                            for (ArchiveRecord record : reader) {
                                Page page = ArchiveUtil.buildPageIgnoreErrors(record);
                                if (page.getOutboundLinks().size() > 0) {
                                    log.info("Loading page {} with {} links", page.getUrl(),
                                            page.getOutboundLinks().size());
                                    if (rateLimiter != null) {
                                        rateLimiter.acquire();
                                    }
                                    le.execute(PageLoader.updatePage(page));
                                }
                            }
                        }
                    } catch (IOException e) {
                        log.error("Exception while processing {}", path, e);
                    }
                });
            }
        });
    }
}