Example usage for org.apache.hadoop.hdfs DistributedFileSystem listStatus

List of usage examples for org.apache.hadoop.hdfs DistributedFileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs DistributedFileSystem listStatus.

Prototype

@Override
public FileStatus[] listStatus(Path p) throws IOException 

Source Link

Document

List all the entries of a directory Note that this operation is not atomic for a large directory.

Usage

From source file:alluxio.underfs.hdfs.LocalMiniDFSCluster.java

License:Apache License

/**
 * Tests the local minidfscluster only.//from w  w  w .j  a  v  a2s . c o  m
 */
public static void main(String[] args) throws Exception {
    LocalMiniDFSCluster cluster = null;
    try {
        cluster = new LocalMiniDFSCluster("/tmp/dfs", 1, 54321);
        cluster.start();
        System.out.println("Address of local minidfscluster: " + cluster.getUnderFilesystemAddress());
        Thread.sleep(10);
        DistributedFileSystem dfs = cluster.getDFSClient();
        dfs.mkdirs(new Path("/1"));
        mkdirs(cluster.getUnderFilesystemAddress() + "/1/2");
        FileStatus[] fs = dfs.listStatus(new Path(AlluxioURI.SEPARATOR));
        assert fs.length != 0;
        System.out.println(fs[0].getPath().toUri());
        dfs.close();

        cluster.shutdown();

        cluster = new LocalMiniDFSCluster("/tmp/dfs", 3);
        cluster.start();
        System.out.println("Address of local minidfscluster: " + cluster.getUnderFilesystemAddress());

        dfs = cluster.getDFSClient();
        dfs.mkdirs(new Path("/1"));

        UnderFileSystemUtils
                .touch(cluster.getUnderFilesystemAddress() + "/1" + "/_format_" + System.currentTimeMillis());
        fs = dfs.listStatus(new Path("/1"));
        assert fs.length != 0;
        System.out.println(fs[0].getPath().toUri());
        dfs.close();

        cluster.shutdown();
    } finally {
        if (cluster != null && cluster.isStarted()) {
            cluster.shutdown();
        }
    }
}

From source file:com.streamsets.datacollector.hdfs.cluster.KafkaToHDFSIT.java

License:Apache License

@Test(timeout = 120000)
public void testKafkaToHDFSOnCluster() throws Exception {
    List<URI> list = miniSDC.getListOfSlaveSDCURI();
    Assert.assertTrue(list != null && !list.isEmpty());

    Map<String, Map<String, Object>> countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
    Assert.assertNotNull(countersMap);//w  w  w. ja  v a 2  s . co m
    while (VerifyUtils.getSourceOutputRecords(countersMap) != RECORDS_PRODUCED) {
        LOG.debug("Source output records are not equal to " + RECORDS_PRODUCED + " retrying again");
        Thread.sleep(500);
        countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
        Assert.assertNotNull(countersMap);
    }
    while (VerifyUtils.getTargetInputRecords(countersMap) != RECORDS_REACHING_TARGET) {
        LOG.debug("Target Input records are not equal to " + RECORDS_REACHING_TARGET + " retrying again");
        Thread.sleep(500);
        countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
        Assert.assertNotNull(countersMap);
    }
    //HDFS configuration is set to roll file after 15 records.
    int recordsRead = 0;
    DistributedFileSystem fileSystem = miniDFS.getFileSystem();
    FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/tmp/out/" + TestUtil.getCurrentYear()));
    for (FileStatus f : fileStatuses) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(f.getPath())));
        String line = br.readLine();
        while (line != null) {
            Assert.assertTrue(line.contains("Hello Kafka"));
            int j = Integer.parseInt(line.substring(11, 12));
            Assert.assertTrue(j % 2 == 0);
            recordsRead++;
            line = br.readLine();
        }
    }

    Assert.assertEquals(RECORDS_REACHING_TARGET, recordsRead);
}

From source file:com.streamsets.datacollector.hdfs.cluster.TestKafkaToHDFS.java

License:Apache License

@Test(timeout = 120000)
public void testKafkaToHDFSOnCluster() throws Exception {
    List<URI> list = miniSDC.getListOfSlaveSDCURI();
    Assert.assertTrue(list != null && !list.isEmpty());

    Map<String, Map<String, Object>> countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
    Assert.assertNotNull(countersMap);/*from   w  ww  . j ava 2 s.c  om*/
    while (VerifyUtils.getSourceOutputRecords(countersMap) != RECORDS_PRODUCED) {
        LOG.debug("Source output records are not equal to " + RECORDS_PRODUCED + " retrying again");
        Thread.sleep(500);
        countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
        Assert.assertNotNull(countersMap);
    }
    while (VerifyUtils.getTargetInputRecords(countersMap) != RECORDS_REACHING_TARGET) {
        LOG.debug("Target Input records are not equal to " + RECORDS_REACHING_TARGET + " retrying again");
        Thread.sleep(500);
        countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
        Assert.assertNotNull(countersMap);
    }
    //HDFS configuration is set to roll file after 15 records.
    int recordsRead = 0;
    DistributedFileSystem fileSystem = miniDFS.getFileSystem();
    //resolve ${YYYY()} instead of hardcoding 2015
    FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/tmp/out/2015"));
    for (FileStatus f : fileStatuses) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(f.getPath())));
        String line = br.readLine();
        while (line != null) {
            Assert.assertTrue(line.contains("Hello Kafka"));
            int j = Integer.parseInt(line.substring(11, 12));
            Assert.assertTrue(j % 2 == 0);
            recordsRead++;
            line = br.readLine();
        }
    }

    Assert.assertEquals(RECORDS_REACHING_TARGET, recordsRead);
}

From source file:com.streamsets.datacollector.hdfs.standalone.HdfsDestinationPipelineRunIT.java

License:Apache License

@Override
protected int getRecordsInTarget() throws IOException {
    int recordsRead = 0;
    DistributedFileSystem fileSystem = miniDFS.getFileSystem();
    FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/tmp/out/" + TestUtil.getCurrentYear()));
    for (FileStatus f : fileStatuses) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(f.getPath())));
        String line = br.readLine();
        while (line != null) {
            recordsRead++;//www  .j  av a  2 s.com
            line = br.readLine();
        }
    }
    return recordsRead;
}

From source file:com.streamsets.datacollector.hdfs.standalone.TestHdfsDestinationPipelineRun.java

License:Apache License

@Override
protected int getRecordsInTarget() throws IOException {
    int recordsRead = 0;
    DistributedFileSystem fileSystem = miniDFS.getFileSystem();
    //resolve ${YYYY()} instead of hardcoding 2015
    FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/tmp/out/2015"));
    for (FileStatus f : fileStatuses) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(f.getPath())));
        String line = br.readLine();
        while (line != null) {
            recordsRead++;//from  w ww .ja v a 2 s.  c o m
            line = br.readLine();
        }
    }
    return recordsRead;
}

From source file:org.apache.ambari.servicemonitor.probes.DfsListProbe.java

License:Apache License

@Override
public ProbeStatus ping(boolean livePing) {

    ProbeStatus status = new ProbeStatus();
    DistributedFileSystem hdfs = null;
    try {/*from  w ww  . ja  va  2 s  . co m*/
        hdfs = createDfs();
        Path dfsPath;
        dfsPath = new Path(path);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Listing " + getName());
        }
        FileStatus[] fileStatuses = hdfs.listStatus(dfsPath);
        if (fileStatuses != null) {
            //successful operation
            status.succeed(this);
            status.setMessage(getName() + " contains " + fileStatuses.length + " entries");
        } else {
            //no file
            status.finish(this, false, "Path " + path + " not found", null);
        }
    } catch (IOException e) {
        status.fail(this, new IOException(getName() + " : " + e, e));
        LOG.debug("Failure to probe " + getName());
    } finally {
        DFSUtils.closeDFS(hdfs);
    }
    return status;
}

From source file:org.apache.falcon.snapshots.replication.HdfsSnapshotReplicator.java

License:Apache License

private String findLatestReplicatedSnapshot(DistributedFileSystem sourceFs, DistributedFileSystem targetFs,
        String sourceDir, String targetDir) throws FalconException {
    try {//  ww w  . j  a va 2s . co m
        FileStatus[] sourceSnapshots = sourceFs.listStatus(new Path(getSnapshotDir(sourceDir)));
        Set<String> sourceSnapshotNames = new HashSet<>();
        for (FileStatus snapshot : sourceSnapshots) {
            sourceSnapshotNames.add(snapshot.getPath().getName());
        }

        FileStatus[] targetSnapshots = targetFs.listStatus(new Path(getSnapshotDir(targetDir)));
        if (targetSnapshots.length > 0) {
            //sort target snapshots in desc order of creation time.
            Arrays.sort(targetSnapshots, new Comparator<FileStatus>() {
                @Override
                public int compare(FileStatus f1, FileStatus f2) {
                    return Long.compare(f2.getModificationTime(), f1.getModificationTime());
                }
            });

            // get most recent snapshot name that exists in source.
            for (FileStatus targetSnapshot : targetSnapshots) {
                String name = targetSnapshot.getPath().getName();
                if (sourceSnapshotNames.contains(name)) {
                    return name;
                }
            }
            // If control reaches here,
            // there are snapshots on target, but none are replicated from source. Return null.
        } // No target snapshots, return null
        return null;
    } catch (IOException e) {
        LOG.error("Unable to find latest snapshot on targetDir {} {}", targetDir, e.getMessage());
        throw new FalconException("Unable to find latest snapshot on targetDir " + targetDir, e);
    }
}

From source file:org.apache.falcon.snapshots.retention.HdfsSnapshotEvictor.java

License:Apache License

protected static void evictSnapshots(DistributedFileSystem fs, String dirName, String ageLimit,
        int numSnapshots) throws FalconException {
    try {/*  www.j  a v  a 2s  .co  m*/
        LOG.info("Started evicting snapshots on dir {}{} using policy {}, agelimit {}, numSnapshot {}",
                fs.getUri(), dirName, ageLimit, numSnapshots);

        long evictionTime = System.currentTimeMillis() - EvictionHelper.evalExpressionToMilliSeconds(ageLimit);

        dirName = StringUtils.removeEnd(dirName, Path.SEPARATOR);
        String snapshotDir = dirName + Path.SEPARATOR + HdfsSnapshotUtil.SNAPSHOT_DIR_PREFIX + Path.SEPARATOR;
        FileStatus[] snapshots = fs.listStatus(new Path(snapshotDir));
        if (snapshots.length <= numSnapshots) {
            // no eviction needed
            return;
        }

        // Sort by last modified time, ascending order.
        Arrays.sort(snapshots, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus f1, FileStatus f2) {
                return Long.compare(f1.getModificationTime(), f2.getModificationTime());
            }
        });

        for (int i = 0; i < (snapshots.length - numSnapshots); i++) {
            // delete if older than ageLimit while retaining numSnapshots
            if (snapshots[i].getModificationTime() < evictionTime) {
                fs.deleteSnapshot(new Path(dirName), snapshots[i].getPath().getName());
            }
        }

    } catch (ELException ele) {
        LOG.warn("Unable to parse retention age limit {} {}", ageLimit, ele.getMessage());
        throw new FalconException("Unable to parse retention age limit " + ageLimit, ele);
    } catch (IOException ioe) {
        LOG.warn("Unable to evict snapshots from dir {} {}", dirName, ioe);
        throw new FalconException("Unable to evict snapshots from dir " + dirName, ioe);
    }

}

From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java

License:Apache License

/**
 * Tests that with the output to file option set, the scrutiny tool outputs invalid rows to file
 *//*w w  w .  j a v a 2 s .c  om*/
@Test
public void testOutputInvalidRowsToFile() throws Exception {
    insertOneValid_OneBadVal_OneMissingTarget();

    String[] argValues = getArgValues(schemaName, dataTableName, indexTableName, System.currentTimeMillis(),
            10L, SourceTable.DATA_TABLE_SOURCE, true, OutputFormat.FILE, null);
    runScrutiny(argValues);

    // check the output files
    Path outputPath = CsvBulkImportUtil.getOutputPath(new Path(outputDir), dataTableFullName);
    DistributedFileSystem fs = getUtility().getDFSCluster().getFileSystem();
    List<Path> paths = Lists.newArrayList();
    Path firstPart = null;
    for (FileStatus outputFile : fs.listStatus(outputPath)) {
        if (outputFile.getPath().getName().startsWith("part")) {
            if (firstPart == null) {
                firstPart = outputFile.getPath();
            } else {
                paths.add(outputFile.getPath());
            }
        }
    }
    if (dataTableDdl.contains("SALT_BUCKETS")) {
        fs.concat(firstPart, paths.toArray(new Path[0]));
    }
    Path outputFilePath = firstPart;
    assertTrue(fs.exists(outputFilePath));
    FSDataInputStream fsDataInputStream = fs.open(outputFilePath);
    BufferedReader reader = new BufferedReader(new InputStreamReader(fsDataInputStream));
    TreeSet<String> lines = Sets.newTreeSet();
    try {
        String line = null;
        while ((line = reader.readLine()) != null) {
            lines.add(line);
        }
    } finally {
        IOUtils.closeQuietly(reader);
        IOUtils.closeQuietly(fsDataInputStream);
    }
    Iterator<String> lineIterator = lines.iterator();
    assertEquals("[2, name-2, " + new Timestamp(testTime).toString() + ", 95123]\t[2, name-2, "
            + new Timestamp(testTime).toString() + ", 9999]", lineIterator.next());
    assertEquals("[3, name-3, " + new Timestamp(testTime).toString() + ", 95123]\tTarget row not found",
            lineIterator.next());

}

From source file:tachyon.LocalMiniDFSCluster.java

License:Apache License

/**
 * Test the local minidfscluster only/*from   w w  w.  j a  v a 2  s.  co  m*/
 */
public static void main(String[] args) throws Exception {
    LocalMiniDFSCluster cluster = null;
    TachyonConf tachyonConf = new TachyonConf();
    try {
        cluster = new LocalMiniDFSCluster("/tmp/dfs", 1, 54321, tachyonConf);
        cluster.start();
        System.out.println("Address of local minidfscluster: " + cluster.getUnderFilesystemAddress());
        Thread.sleep(10);
        DistributedFileSystem dfs = cluster.getDFSClient();
        dfs.mkdirs(new Path("/1"));
        mkdirs(cluster.getUnderFilesystemAddress() + "/1/2", tachyonConf);
        FileStatus[] fs = dfs.listStatus(new Path(TachyonURI.SEPARATOR));
        assert fs.length != 0;
        System.out.println(fs[0].getPath().toUri());
        dfs.close();

        cluster.shutdown();

        cluster = new LocalMiniDFSCluster("/tmp/dfs", 3, tachyonConf);
        cluster.start();
        System.out.println("Address of local minidfscluster: " + cluster.getUnderFilesystemAddress());

        dfs = cluster.getDFSClient();
        dfs.mkdirs(new Path("/1"));

        CommonUtils.touch(cluster.getUnderFilesystemAddress() + "/1" + "/_format_" + System.currentTimeMillis(),
                tachyonConf);
        fs = dfs.listStatus(new Path("/1"));
        assert fs.length != 0;
        System.out.println(fs[0].getPath().toUri());
        dfs.close();

        cluster.shutdown();
    } finally {
        if (cluster != null && cluster.isStarted()) {
            cluster.shutdown();
        }
    }
}