Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperator.java

License:Apache License

/**
 * Scans the directory for new files.// www. ja va 2 s . com
 */
protected void scanDirectory() {
    if (System.currentTimeMillis() - scanIntervalMillis >= lastScanMillis) {
        Set<Path> newPaths = scanner.scan(fs, filePath, processedFiles);

        for (Path newPath : newPaths) {
            try {
                FileStatus fileStatus = fs.getFileStatus(newPath);
                if (fileStatus.isDirectory()) {
                    checkVisitedDirectory(newPath);
                } else {
                    String newPathString = newPath.toString();
                    pendingFiles.add(newPathString);
                    processedFiles.add(newPathString);
                    localProcessedFileCount.increment();
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        lastScanMillis = System.currentTimeMillis();
    }
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileSplitter.java

License:Apache License

/**
 * Creates file-metadata and populates no. of blocks in the metadata.
 *
 * @param fileInfo file information//from   ww w  . j ava 2  s  .co m
 * @return file-metadata
 * @throws IOException
 */
protected FileMetadata buildFileMetadata(FileInfo fileInfo) throws IOException {
    LOG.debug("file {}", fileInfo.getFilePath());
    FileMetadata fileMetadata = createFileMetadata(fileInfo);
    Path path = new Path(fileInfo.getFilePath());

    fileMetadata.setFileName(path.getName());

    FileStatus status = getFileStatus(path);
    fileMetadata.setDirectory(status.isDirectory());
    fileMetadata.setFileLength(status.getLen());

    if (fileInfo.getDirectoryPath() == null) { // Direct filename is given as input.
        fileMetadata.setRelativePath(status.getPath().getName());
    } else {
        String relativePath = getRelativePathWithFolderName(fileInfo);
        fileMetadata.setRelativePath(relativePath);
    }

    if (!status.isDirectory()) {
        int noOfBlocks = (int) ((status.getLen() / blockSize) + (((status.getLen() % blockSize) == 0) ? 0 : 1));
        if (fileMetadata.getDataOffset() >= status.getLen()) {
            noOfBlocks = 0;
        }
        fileMetadata.setNumberOfBlocks(noOfBlocks);
        populateBlockIds(fileMetadata);
    }
    return fileMetadata;
}

From source file:org.apache.apex.malhar.lib.io.fs.FileSplitter.java

License:Apache License

/**
 * Creates file-metadata and populates no. of blocks in the metadata.
 *
 * @param fileInfo file information/*from   www. ja v a 2 s  .c o  m*/
 * @return file-metadata
 * @throws IOException
 */
protected FileMetadata buildFileMetadata(FileInfo fileInfo) throws IOException {
    String filePathStr = fileInfo.getFilePath();
    LOG.debug("file {}", filePathStr);
    FileMetadata fileMetadata = new FileMetadata(filePathStr);
    Path path = new Path(filePathStr);

    fileMetadata.setFileName(path.getName());

    FileStatus status = fs.getFileStatus(path);
    fileMetadata.setDirectory(status.isDirectory());
    fileMetadata.setFileLength(status.getLen());

    if (!status.isDirectory()) {
        int noOfBlocks = (int) ((status.getLen() / blockSize) + (((status.getLen() % blockSize) == 0) ? 0 : 1));
        if (fileMetadata.getDataOffset() >= status.getLen()) {
            noOfBlocks = 0;
        }
        fileMetadata.setNumberOfBlocks(noOfBlocks);
        populateBlockIds(fileMetadata);
    }
    return fileMetadata;
}

From source file:org.apache.asterix.aoya.AsterixApplicationMaster.java

License:Apache License

/**
 * Here I am just pointing the Containers to the exisiting HDFS resources given by the Client
 * filesystem of the nodes.//from  w  w  w  .ja  va 2s  .c  o m
 *
 * @throws IOException
 */
private void localizeDFSResources() throws IOException {
    //if performing an 'offline' task, skip a lot of resource distribution
    if (obliterate || backup || restore) {
        if (appMasterJar == null || ("").equals(appMasterJar)) {
            //this can happen in a jUnit testing environment. we don't need to set it there.
            if (!conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
                throw new IllegalStateException("AM jar not provided in environment.");
            } else {
                return;
            }
        }
        FileSystem fs = FileSystem.get(conf);
        FileStatus appMasterJarStatus = fs.getFileStatus(appMasterJar);
        LocalResource obliteratorJar = Records.newRecord(LocalResource.class);
        obliteratorJar.setType(LocalResourceType.FILE);
        obliteratorJar.setVisibility(LocalResourceVisibility.PRIVATE);
        obliteratorJar.setResource(ConverterUtils.getYarnUrlFromPath(appMasterJar));
        obliteratorJar.setTimestamp(appMasterJarStatus.getModificationTime());
        obliteratorJar.setSize(appMasterJarStatus.getLen());
        localResources.put("asterix-yarn.jar", obliteratorJar);
        LOG.info(localResources.values());
        return;
    }
    //otherwise, distribute evertything to start up asterix

    LocalResource asterixZip = Records.newRecord(LocalResource.class);

    //this un-tar's the asterix distribution
    asterixZip.setType(LocalResourceType.ARCHIVE);

    asterixZip.setVisibility(LocalResourceVisibility.PRIVATE);
    try {
        asterixZip.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixZipPath)));

    } catch (URISyntaxException e) {
        LOG.error("Error locating Asterix zip" + " in env, path=" + asterixZipPath);
        throw new IOException(e);
    }

    asterixZip.setTimestamp(asterixZipTimestamp);
    asterixZip.setSize(asterixZipLen);
    localResources.put(ASTERIX_ZIP_NAME, asterixZip);

    //now let's do the same for the cluster description XML
    LocalResource asterixConf = Records.newRecord(LocalResource.class);
    asterixConf.setType(LocalResourceType.FILE);

    asterixConf.setVisibility(LocalResourceVisibility.PRIVATE);
    try {
        asterixConf.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixConfPath)));
    } catch (URISyntaxException e) {
        LOG.error("Error locating Asterix config" + " in env, path=" + asterixConfPath);
        throw new IOException(e);
    }
    //TODO: I could avoid localizing this everywhere by only calling this block on the metadata node.
    asterixConf.setTimestamp(asterixConfTimestamp);
    asterixConf.setSize(asterixConfLen);
    localResources.put("cluster-config.xml", asterixConf);
    //now add the libraries if there are any
    try {
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path(dfsBasePath, instanceConfPath + File.separator + "library" + Path.SEPARATOR);
        if (fs.exists(p)) {
            FileStatus[] dataverses = fs.listStatus(p);
            for (FileStatus d : dataverses) {
                if (!d.isDirectory())
                    throw new IOException("Library configuration directory structure is incorrect");
                FileStatus[] libraries = fs.listStatus(d.getPath());
                for (FileStatus l : libraries) {
                    if (l.isDirectory())
                        throw new IOException("Library configuration directory structure is incorrect");
                    LocalResource lr = Records.newRecord(LocalResource.class);
                    lr.setResource(ConverterUtils.getYarnUrlFromURI(l.getPath().toUri()));
                    lr.setSize(l.getLen());
                    lr.setTimestamp(l.getModificationTime());
                    lr.setType(LocalResourceType.ARCHIVE);
                    lr.setVisibility(LocalResourceVisibility.PRIVATE);
                    localResources.put("library" + Path.SEPARATOR + d.getPath().getName() + Path.SEPARATOR
                            + l.getPath().getName().split("\\.")[0], lr);
                    LOG.info("Found library: " + l.getPath().toString());
                    LOG.info(l.getPath().getName());
                }
            }
        }
    } catch (FileNotFoundException e) {
        LOG.info("No external libraries present");
        //do nothing, it just means there aren't libraries. that is possible and ok
        // it should be handled by the fs.exists(p) check though.
    }
    LOG.info(localResources.values());

}

From source file:org.apache.asterix.metadata.utils.ExternalIndexingOperations.java

License:Apache License

private static void handleFile(Dataset dataset, List<ExternalFile> files, FileSystem fs, FileStatus fileStatus,
        int nextFileNumber) throws IOException {
    if (fileStatus.isDirectory()) {
        listSubFiles(dataset, fs, fileStatus, files);
    } else {//from w ww  .j a v  a 2 s .  com
        files.add(new ExternalFile(dataset.getDataverseName(), dataset.getDatasetName(), nextFileNumber,
                fileStatus.getPath().toUri().getPath(), new Date(fileStatus.getModificationTime()),
                fileStatus.getLen(), ExternalFilePendingOp.NO_OP));
    }
}

From source file:org.apache.beam.sdk.io.hdfs.HadoopFileSystem.java

License:Apache License

private Set<Metadata> matchRecursiveGlob(String directorySpec, String fileSpec) throws IOException {
    final org.apache.hadoop.fs.FileSystem fs = new Path(directorySpec).getFileSystem(configuration);
    Set<Metadata> metadata = new HashSet<>();
    if (directorySpec.contains("*")) {
        // An abstract directory with a wildcard is converted to concrete directories to search.
        FileStatus[] directoryStatuses = fs.globStatus(new Path(directorySpec));
        for (FileStatus directoryStatus : directoryStatuses) {
            if (directoryStatus.isDirectory()) {
                metadata.addAll(matchRecursiveGlob(directoryStatus.getPath().toUri().toString(), fileSpec));
            }/*from  w ww. j  a  v  a 2 s  . c  o  m*/
        }
    } else {
        // A concrete directory is searched.
        FileStatus[] fileStatuses = fs.globStatus(new Path(directorySpec + "/" + fileSpec));
        for (FileStatus fileStatus : fileStatuses) {
            if (fileStatus.isFile()) {
                metadata.add(toMetadata(fileStatus));
            }
        }

        // All sub-directories of a concrete directory are searched.
        FileStatus[] directoryStatuses = fs.globStatus(new Path(directorySpec + "/*"));
        for (FileStatus directoryStatus : directoryStatuses) {
            if (directoryStatus.isDirectory()) {
                metadata.addAll(matchRecursiveGlob(directoryStatus.getPath().toUri().toString(), fileSpec));
            }
        }

        // Handle additional instances of recursive globs.
        if (fileSpec.contains("**")) {
            int index = fileSpec.indexOf("**");
            metadata.addAll(matchRecursiveGlob(directorySpec + "/" + fileSpec.substring(0, index + 1),
                    fileSpec.substring(index + 1)));
        }
    }
    return metadata;
}

From source file:org.apache.camel.component.hdfs2.HdfsConsumer.java

License:Apache License

private boolean normalFileIsDirectoryNoSuccessFile(FileStatus status, HdfsInfo info) throws IOException {
    if (config.getFileType().equals(HdfsFileType.NORMAL_FILE) && status.isDirectory()) {
        Path successPath = new Path(status.getPath().toString() + "/_SUCCESS");
        if (!info.getFileSystem().exists(successPath)) {
            return true;
        }/*ww  w  . j  a v a  2  s .  co  m*/
    }
    return false;
}

From source file:org.apache.distributedlog.fs.TestDLFileSystem.java

License:Apache License

@Test
public void testListStatuses() throws Exception {
    Path parentPath = new Path("/path/to/" + runtime.getMethodName());
    assertFalse(fs.exists(parentPath));//from w  w w . ja  v a  2s . c om
    try (FSDataOutputStream parentOut = fs.create(parentPath)) {
        parentOut.writeBytes("parent");
        parentOut.flush();
    }
    assertTrue(fs.exists(parentPath));

    int numLogs = 3;
    for (int i = 0; i < numLogs; i++) {
        Path path = new Path("/path/to/" + runtime.getMethodName() + "/" + runtime.getMethodName() + "-" + i);
        assertFalse(fs.exists(path));
        try (FSDataOutputStream out = fs.create(path)) {
            out.writeBytes("line");
            out.flush();
        }
        assertTrue(fs.exists(path));
    }
    FileStatus[] files = fs.listStatus(new Path("/path/to/" + runtime.getMethodName()));

    assertEquals(3, files.length);
    for (int i = 0; i < numLogs; i++) {
        FileStatus file = files[i];
        assertEquals(4, file.getLen());
        assertFalse(file.isDirectory());
        assertEquals(3, file.getReplication());
        assertEquals(0L, file.getModificationTime());
        assertEquals(new Path("/path/to/" + runtime.getMethodName() + "/" + runtime.getMethodName() + "-" + i),
                file.getPath());
    }
}

From source file:org.apache.drill.exec.expr.fn.registry.RemoteFunctionRegistry.java

License:Apache License

/**
 * Concatenates udf are with root directory.
 * Creates udf area, if area does not exist.
 * Checks if area exists and is directory, if it is writable for current user,
 * throws {@link DrillRuntimeException} otherwise.
 *
 * @param fs file system where area should be created or checked
 * @param root root directory/*  ww w  . j a v a 2s  . c o  m*/
 * @param directory directory path
 * @return path to area
 */
private Path createArea(FileSystem fs, String root, String directory) {
    Path path = new Path(new File(root, directory).toURI().getPath());
    String fullPath = path.toUri().getPath();
    try {
        fs.mkdirs(path);
        Preconditions.checkState(fs.exists(path), "Area [%s] must exist", fullPath);
        FileStatus fileStatus = fs.getFileStatus(path);
        Preconditions.checkState(fileStatus.isDirectory(), "Area [%s] must be a directory", fullPath);
        FsPermission permission = fileStatus.getPermission();
        // It is considered that process user has write rights on directory if:
        // 1. process user is owner of the directory and has write rights
        // 2. process user is in group that has write rights
        // 3. any user has write rights
        Preconditions.checkState(
                (ImpersonationUtil.getProcessUserName().equals(fileStatus.getOwner())
                        && permission.getUserAction().implies(FsAction.WRITE))
                        || (Sets.newHashSet(ImpersonationUtil.getProcessUserGroupNames()).contains(
                                fileStatus.getGroup()) && permission.getGroupAction().implies(FsAction.WRITE))
                        || permission.getOtherAction().implies(FsAction.WRITE),
                "Area [%s] must be writable and executable for application user", fullPath);
    } catch (Exception e) {
        if (e instanceof DrillRuntimeException) {
            throw (DrillRuntimeException) e;
        }
        // throws
        DrillRuntimeException.format(e, "Error during udf area creation [%s] on file system [%s]", fullPath,
                fs.getUri());
    }
    logger.info("Created remote udf area [{}] on file system [{}]", fullPath, fs.getUri());
    return path;
}

From source file:org.apache.drill.exec.planner.sql.handlers.AnalyzeTableHandler.java

License:Apache License

private boolean tableModified(DrillFileSystem fs, Path parentPath, long statsModificationTime)
        throws IOException {
    for (final FileStatus file : fs.listStatus(parentPath)) {
        // If directory or files within it are modified
        if (file.getModificationTime() > statsModificationTime) {
            return true;
        }/* w w w .j  a  va  2  s. c om*/
        // For a directory, we should recursively check sub-directories
        if (file.isDirectory() && tableModified(fs, file.getPath(), statsModificationTime)) {
            return true;
        }
    }
    return false;
}