Example usage for org.apache.hadoop.hdfs DistributedFileSystem exists

List of usage examples for org.apache.hadoop.hdfs DistributedFileSystem exists


In this page you can find the example usage for org.apache.hadoop.hdfs DistributedFileSystem exists.


public boolean exists(Path f) throws IOException 

Source Link


Check if a path exists.


From source file:com.cloudera.impala.analysis.LoadDataStmt.java

License:Apache License

private void analyzePaths(Analyzer analyzer, HdfsTable hdfsTable) throws AnalysisException {
    // The user must have permission to access the source location. Since the files will
    // be moved from this location, the user needs to have all permission.
    sourceDataPath_.analyze(analyzer, Privilege.ALL);

    try {/*from   w w  w .  j a v a  2s  .c  o m*/
        Path source = sourceDataPath_.getPath();
        FileSystem fs = source.getFileSystem(FileSystemUtil.getConfiguration());
        // sourceDataPath_.analyze() ensured that path is on an HDFS filesystem.
        Preconditions.checkState(fs instanceof DistributedFileSystem);
        DistributedFileSystem dfs = (DistributedFileSystem) fs;
        if (!dfs.exists(source)) {
            throw new AnalysisException(String.format("INPATH location '%s' does not exist.", sourceDataPath_));

        if (dfs.isDirectory(source)) {
            if (FileSystemUtil.getTotalNumVisibleFiles(source) == 0) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' contains no visible files.", sourceDataPath_));
            if (FileSystemUtil.containsSubdirectory(source)) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' cannot contain subdirectories.", sourceDataPath_));
        } else { // INPATH points to a file.
            if (FileSystemUtil.isHiddenFile(source.getName())) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' points to a hidden file.", source));

        String noWriteAccessErrorMsg = String.format(
                "Unable to LOAD DATA into "
                        + "target table (%s) because Impala does not have WRITE access to HDFS " + "location: ",

        HdfsPartition partition;
        String location;
        if (partitionSpec_ != null) {
            partition = hdfsTable.getPartition(partitionSpec_.getPartitionSpecKeyValues());
            location = partition.getLocation();
            if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {
                throw new AnalysisException(noWriteAccessErrorMsg + partition.getLocation());
        } else {
            // "default" partition
            partition = hdfsTable.getPartitions().get(0);
            location = hdfsTable.getLocation();
            if (!hdfsTable.hasWriteAccess()) {
                throw new AnalysisException(noWriteAccessErrorMsg + hdfsTable.getLocation());

        // Until Frontend.loadTableData() can handle cross-filesystem and filesystems
        // that aren't HDFS, require that source and dest are on the same HDFS.
        if (!FileSystemUtil.isPathOnFileSystem(new Path(location), fs)) {
            throw new AnalysisException(String.format(
                    "Unable to LOAD DATA into target table (%s) because source path (%s) and "
                            + "destination %s (%s) are on different file-systems.",
                    hdfsTable.getFullName(), source, partitionSpec_ == null ? "table" : "partition",
        // Verify the files being loaded are supported.
        for (FileStatus fStatus : fs.listStatus(source)) {
            if (fs.isDirectory(fStatus.getPath()))
            StringBuilder errorMsg = new StringBuilder();
            HdfsFileFormat fileFormat = partition.getInputFormatDescriptor().getFileFormat();
            if (!fileFormat.isFileCompressionTypeSupported(fStatus.getPath().toString(), errorMsg)) {
                throw new AnalysisException(errorMsg.toString());
    } catch (FileNotFoundException e) {
        throw new AnalysisException("File not found: " + e.getMessage(), e);
    } catch (IOException e) {
        throw new AnalysisException("Error accessing file system: " + e.getMessage(), e);

From source file:com.trace.hadoop.TestDFSRename.java

License:Apache License

 * Perform operations such as setting quota, deletion of files, rename and
 * ensure system can apply edits log during startup.
 *//*from  w  w  w .  j  ava  2s. c  o  m*/
public void testEditsLog() throws Exception {
    DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();
    Path src1 = new Path(dir, "testEditsLog/srcdir/src1");
    Path dst1 = new Path(dir, "testEditsLog/dstdir/dst1");
    createFile(fs, src1);
    createFile(fs, dst1);

    // Set quota so that dst1 parent cannot allow under it new files/directories 
    fs.setQuota(dst1.getParent(), 2, FSConstants.QUOTA_DONT_SET);
    // Free up quota for a subsequent rename
    fs.delete(dst1, true);
    rename(src1, dst1, true, false);

    // Restart the cluster and ensure the above operations can be
    // loaded from the edits log
    fs = (DistributedFileSystem) cluster.getFileSystem();
    assertFalse(fs.exists(src1)); // ensure src1 is already renamed
    assertTrue(fs.exists(dst1)); // ensure rename dst exists

From source file:com.trace.hadoop.TestDFSRename.java

License:Apache License

private void rename(Path src, Path dst, boolean renameSucceeds, boolean quotaException) throws Exception {
    DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();
    try {/*from w  w  w.  j  a  v a2  s.  c  om*/
        assertEquals(renameSucceeds, fs.rename(src, dst));
    } catch (QuotaExceededException ex) {
    assertEquals(renameSucceeds, !fs.exists(src));
    assertEquals(renameSucceeds, fs.exists(dst));

From source file:io.druid.indexer.HdfsClasspathSetupTest.java

License:Apache License

public void testAddSnapshotJarToClasspath() throws IOException {
    Job job = Job.getInstance(conf, "test-job");
    DistributedFileSystem fs = miniCluster.getFileSystem();
    Path intermediatePath = new Path("/tmp/classpath");
    JobHelper.addSnapshotJarToClassPath(dummyJarFile, intermediatePath, fs, job);
    Path expectedJarPath = new Path(intermediatePath, dummyJarFile.getName());
    // check file gets uploaded to HDFS
    // check file gets added to the classpath
    Assert.assertEquals(expectedJarPath.toString(), job.getConfiguration().get(MRJobConfig.CLASSPATH_FILES));
    Assert.assertEquals(dummyJarString, StringUtils.fromUtf8(IOUtils.toByteArray(fs.open(expectedJarPath))));

From source file:io.druid.indexer.HdfsClasspathSetupTest.java

License:Apache License

public void testAddNonSnapshotJarToClasspath() throws IOException {
    Job job = Job.getInstance(conf, "test-job");
    DistributedFileSystem fs = miniCluster.getFileSystem();
    JobHelper.addJarToClassPath(dummyJarFile, finalClasspath, intermediatePath, fs, job);
    Path expectedJarPath = new Path(finalClasspath, dummyJarFile.getName());
    // check file gets uploaded to final HDFS path
    // check that the intermediate file gets deleted
    Assert.assertFalse(fs.exists(new Path(intermediatePath, dummyJarFile.getName())));
    // check file gets added to the classpath
    Assert.assertEquals(expectedJarPath.toString(), job.getConfiguration().get(MRJobConfig.CLASSPATH_FILES));
    Assert.assertEquals(dummyJarString, StringUtils.fromUtf8(IOUtils.toByteArray(fs.open(expectedJarPath))));

From source file:io.druid.indexer.HdfsClasspathSetupTest.java

License:Apache License

public void testConcurrentUpload()
        throws IOException, InterruptedException, ExecutionException, TimeoutException {
    final int concurrency = 10;
    ListeningExecutorService pool = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrency));
    // barrier ensures that all jobs try to add files to classpath at same time.
    final CyclicBarrier barrier = new CyclicBarrier(concurrency);
    final DistributedFileSystem fs = miniCluster.getFileSystem();
    final Path expectedJarPath = new Path(finalClasspath, dummyJarFile.getName());
    List<ListenableFuture<Boolean>> futures = new ArrayList<>();

    for (int i = 0; i < concurrency; i++) {
        futures.add(pool.submit(new Callable() {
            @Override//  ww w.ja  va  2 s .  com
            public Boolean call() throws Exception {
                int id = barrier.await();
                Job job = Job.getInstance(conf, "test-job-" + id);
                Path intermediatePathForJob = new Path(intermediatePath, "job-" + id);
                JobHelper.addJarToClassPath(dummyJarFile, finalClasspath, intermediatePathForJob, fs, job);
                // check file gets uploaded to final HDFS path
                // check that the intermediate file is not present
                Assert.assertFalse(fs.exists(new Path(intermediatePathForJob, dummyJarFile.getName())));
                // check file gets added to the classpath
                return true;

    Futures.allAsList(futures).get(30, TimeUnit.SECONDS);


From source file:org.apache.falcon.extensions.mirroring.hdfsSnapshot.HdfsSnapshotMirroringExtension.java

License:Apache License

public void validate(final Properties extensionProperties) throws FalconException {
    for (HdfsSnapshotMirrorProperties option : HdfsSnapshotMirrorProperties.values()) {
        if (extensionProperties.getProperty(option.getName()) == null && option.isRequired()) {
            throw new FalconException("Missing extension property: " + option.getName());
        }/* www  .  j a v a 2s  .c  o  m*/

    Cluster sourceCluster = ClusterHelper
    if (sourceCluster == null) {
        throw new FalconException(
                "SourceCluster entity " + HdfsSnapshotMirrorProperties.SOURCE_CLUSTER.getName() + " not found");
    Cluster targetCluster = ClusterHelper
    if (targetCluster == null) {
        throw new FalconException(
                "TargetCluster entity " + HdfsSnapshotMirrorProperties.TARGET_CLUSTER.getName() + " not found");

    Configuration sourceConf = ClusterHelper.getConfiguration(sourceCluster);
    Configuration targetConf = ClusterHelper.getConfiguration(targetCluster);
    DistributedFileSystem sourceFileSystem = HadoopClientFactory.get()
    DistributedFileSystem targetFileSystem = HadoopClientFactory.get()

    Path sourcePath = new Path(
    Path targetPath = new Path(

    // check if source and target path's exist and are snapshot-able
    try {
        if (sourceFileSystem.exists(sourcePath)) {
            if (!isDirSnapshotable(sourceFileSystem, sourcePath)) {
                throw new FalconException(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_DIR.getName() + " "
                        + sourcePath.toString() + " does not allow snapshots.");
        } else {
            throw new FalconException(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_DIR.getName() + " "
                    + sourcePath.toString() + " does not exist.");
        if (targetFileSystem.exists(targetPath)) {
            if (!isDirSnapshotable(targetFileSystem, targetPath)) {
                throw new FalconException(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_DIR.getName() + " "
                        + targetPath.toString() + " does not allow snapshots.");
        } else {
            throw new FalconException(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_DIR.getName() + " "
                    + targetPath.toString() + " does not exist.");
    } catch (IOException e) {
        throw new FalconException(e.getMessage(), e);


From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java

License:Apache License

 * Tests that with the output to file option set, the scrutiny tool outputs invalid rows to file
 *//*from w  w w .j ava  2s.c  o m*/
public void testOutputInvalidRowsToFile() throws Exception {

    String[] argValues = getArgValues(schemaName, dataTableName, indexTableName, System.currentTimeMillis(),
            10L, SourceTable.DATA_TABLE_SOURCE, true, OutputFormat.FILE, null);

    // check the output files
    Path outputPath = CsvBulkImportUtil.getOutputPath(new Path(outputDir), dataTableFullName);
    DistributedFileSystem fs = getUtility().getDFSCluster().getFileSystem();
    List<Path> paths = Lists.newArrayList();
    Path firstPart = null;
    for (FileStatus outputFile : fs.listStatus(outputPath)) {
        if (outputFile.getPath().getName().startsWith("part")) {
            if (firstPart == null) {
                firstPart = outputFile.getPath();
            } else {
    if (dataTableDdl.contains("SALT_BUCKETS")) {
        fs.concat(firstPart, paths.toArray(new Path[0]));
    Path outputFilePath = firstPart;
    FSDataInputStream fsDataInputStream = fs.open(outputFilePath);
    BufferedReader reader = new BufferedReader(new InputStreamReader(fsDataInputStream));
    TreeSet<String> lines = Sets.newTreeSet();
    try {
        String line = null;
        while ((line = reader.readLine()) != null) {
    } finally {
    Iterator<String> lineIterator = lines.iterator();
    assertEquals("[2, name-2, " + new Timestamp(testTime).toString() + ", 95123]\t[2, name-2, "
            + new Timestamp(testTime).toString() + ", 9999]", lineIterator.next());
    assertEquals("[3, name-3, " + new Timestamp(testTime).toString() + ", 95123]\tTarget row not found",


From source file:org.apache.tajo.storage.TestByteBufLineReader.java

License:Apache License

public void testReaderWithDFS() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    cluster.waitClusterUp();//from  w ww  .  ja v a2 s.co  m

    TajoConf tajoConf = new TajoConf(conf);
    tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo");

    Path tablePath = new Path("/testReaderWithDFS");
    Path filePath = new Path(tablePath, "data.dat");
    try {
        DistributedFileSystem fs = cluster.getFileSystem();
        FSDataOutputStream out = fs.create(filePath, true);

        FSDataInputStream inputStream = fs.open(filePath);
        assertTrue(inputStream.getWrappedStream() instanceof ByteBufferReadable);

        ByteBufLineReader lineReader = new ByteBufLineReader(new FSDataInputChannel(inputStream));
        assertEquals(LINE, lineReader.readLine());
        assertEquals(LINE, lineReader.readLine());

    } finally {

From source file:org.apache.tajo.storage.TestFileStorageManager.java

License:Apache License

public void testGetSplit() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, false);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitClusterUp();//from  w w w  .j  a va 2  s  .co m
    TajoConf tajoConf = new TajoConf(conf);
    tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo");

    int testCount = 10;
    Path tablePath = new Path("/testGetSplit");
    try {
        DistributedFileSystem fs = cluster.getFileSystem();

        // Create test partitions
        List<Path> partitions = Lists.newArrayList();
        for (int i = 0; i < testCount; i++) {
            Path tmpFile = new Path(tablePath, String.valueOf(i));
            DFSTestUtil.createFile(fs, new Path(tmpFile, "tmpfile.dat"), 10, (short) 2, 0xDEADDEADl);

        FileStorageManager sm = (FileStorageManager) StorageManager.getFileStorageManager(tajoConf);
        assertEquals(fs.getUri(), sm.getFileSystem().getUri());

        Schema schema = new Schema();
        schema.addColumn("id", Type.INT4);
        schema.addColumn("age", Type.INT4);
        schema.addColumn("name", Type.TEXT);
        TableMeta meta = CatalogUtil.newTableMeta(StoreType.CSV);

        List<Fragment> splits = Lists.newArrayList();
        // Get FileFragments in partition batch
        splits.addAll(sm.getSplits("data", meta, schema, partitions.toArray(new Path[partitions.size()])));
        assertEquals(testCount, splits.size());
        // -1 is unknown volumeId
        assertEquals(-1, ((FileFragment) splits.get(0)).getDiskIds()[0]);

        splits.addAll(sm.getSplits("data", meta, schema,
                partitions.subList(0, partitions.size() / 2).toArray(new Path[partitions.size() / 2])));
        assertEquals(testCount / 2, splits.size());
        assertEquals(1, splits.get(0).getHosts().length);
        assertEquals(-1, ((FileFragment) splits.get(0)).getDiskIds()[0]);
    } finally {