Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:com.cloudera.hoop.fs.FSUtils.java

License:Open Source License

/**
 * Converts a Hadoop <code>FileStatus</code> object into a JSON array
 * object. It replaces the <code>SCHEME://HOST:PORT</code> of the path
 * with the  specified URL.//ww w  .  j  a  v  a 2s.co  m
 * <p/>
 * @param status Hadoop file status.
 * @param hoopBaseUrl base URL to replace the
 * <code>SCHEME://HOST:PORT</code> in the file status.
 * @return The JSON representation of the file status.
 */
@SuppressWarnings("unchecked")
public static Map fileStatusToJSON(FileStatus status, String hoopBaseUrl) {
    Map json = new LinkedHashMap();
    json.put("path", convertPathToHoop(status.getPath(), hoopBaseUrl).toString());
    json.put("isDir", status.isDir());
    json.put("len", status.getLen());
    json.put("owner", status.getOwner());
    json.put("group", status.getGroup());
    json.put("permission", permissionToString(status.getPermission()));
    json.put("accessTime", status.getAccessTime());
    json.put("modificationTime", status.getModificationTime());
    json.put("blockSize", status.getBlockSize());
    json.put("replication", status.getReplication());
    return json;
}

From source file:com.cloudera.impala.analysis.LoadDataStmt.java

License:Apache License

private void analyzePaths(Analyzer analyzer, HdfsTable hdfsTable) throws AnalysisException {
    // The user must have permission to access the source location. Since the files will
    // be moved from this location, the user needs to have all permission.
    sourceDataPath_.analyze(analyzer, Privilege.ALL);

    try {//w  ww .ja  v a 2  s  . c o  m
        Path source = sourceDataPath_.getPath();
        FileSystem fs = source.getFileSystem(FileSystemUtil.getConfiguration());
        // sourceDataPath_.analyze() ensured that path is on an HDFS filesystem.
        Preconditions.checkState(fs instanceof DistributedFileSystem);
        DistributedFileSystem dfs = (DistributedFileSystem) fs;
        if (!dfs.exists(source)) {
            throw new AnalysisException(String.format("INPATH location '%s' does not exist.", sourceDataPath_));
        }

        if (dfs.isDirectory(source)) {
            if (FileSystemUtil.getTotalNumVisibleFiles(source) == 0) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' contains no visible files.", sourceDataPath_));
            }
            if (FileSystemUtil.containsSubdirectory(source)) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' cannot contain subdirectories.", sourceDataPath_));
            }
        } else { // INPATH points to a file.
            if (FileSystemUtil.isHiddenFile(source.getName())) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' points to a hidden file.", source));
            }
        }

        String noWriteAccessErrorMsg = String.format(
                "Unable to LOAD DATA into "
                        + "target table (%s) because Impala does not have WRITE access to HDFS " + "location: ",
                hdfsTable.getFullName());

        HdfsPartition partition;
        String location;
        if (partitionSpec_ != null) {
            partition = hdfsTable.getPartition(partitionSpec_.getPartitionSpecKeyValues());
            location = partition.getLocation();
            if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {
                throw new AnalysisException(noWriteAccessErrorMsg + partition.getLocation());
            }
        } else {
            // "default" partition
            partition = hdfsTable.getPartitions().get(0);
            location = hdfsTable.getLocation();
            if (!hdfsTable.hasWriteAccess()) {
                throw new AnalysisException(noWriteAccessErrorMsg + hdfsTable.getLocation());
            }
        }
        Preconditions.checkNotNull(partition);

        // Until Frontend.loadTableData() can handle cross-filesystem and filesystems
        // that aren't HDFS, require that source and dest are on the same HDFS.
        if (!FileSystemUtil.isPathOnFileSystem(new Path(location), fs)) {
            throw new AnalysisException(String.format(
                    "Unable to LOAD DATA into target table (%s) because source path (%s) and "
                            + "destination %s (%s) are on different file-systems.",
                    hdfsTable.getFullName(), source, partitionSpec_ == null ? "table" : "partition",
                    partition.getLocation()));
        }
        // Verify the files being loaded are supported.
        for (FileStatus fStatus : fs.listStatus(source)) {
            if (fs.isDirectory(fStatus.getPath()))
                continue;
            StringBuilder errorMsg = new StringBuilder();
            HdfsFileFormat fileFormat = partition.getInputFormatDescriptor().getFileFormat();
            if (!fileFormat.isFileCompressionTypeSupported(fStatus.getPath().toString(), errorMsg)) {
                throw new AnalysisException(errorMsg.toString());
            }
        }
    } catch (FileNotFoundException e) {
        throw new AnalysisException("File not found: " + e.getMessage(), e);
    } catch (IOException e) {
        throw new AnalysisException("Error accessing file system: " + e.getMessage(), e);
    }
}

From source file:com.cloudera.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Creates a new HdfsPartition object to be added to the internal partition list.
 * Populates with file format information and file locations. Partitions may be empty,
 * or may not even exist on the file system (a partition's location may have been
 * changed to a new path that is about to be created by an INSERT). For unchanged
 * files (indicated by unchanged mtime), reuses the FileDescriptor from the
 * oldFileDescMap. The one exception is if the partition is marked as cached
 * in which case the block metadata cannot be reused. Otherwise, creates a new
 * FileDescriptor for each modified or new file and adds it to newFileDescMap.
 * Both old and newFileDescMap are Maps of parent directory (partition location)
 * to list of files (FileDescriptors) under that directory.
 * Returns new partition if successful or null if none was added.
 * Separated from addPartition to reduce the number of operations done
 * while holding the lock on the hdfs table.
        //from  ww  w  .ja v  a 2s .c  om
 *  @throws CatalogException
 *    if the supplied storage descriptor contains metadata that Impala can't
 *    understand.
 */
private HdfsPartition createPartition(StorageDescriptor storageDescriptor,
        org.apache.hadoop.hive.metastore.api.Partition msPartition,
        Map<String, List<FileDescriptor>> oldFileDescMap,
        Map<FsKey, Map<String, List<FileDescriptor>>> perFsFileDescMap) throws CatalogException {
    HdfsStorageDescriptor fileFormatDescriptor = HdfsStorageDescriptor.fromStorageDescriptor(this.name_,
            storageDescriptor);
    Path partDirPath = new Path(storageDescriptor.getLocation());
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();
    // If the partition is marked as cached, the block location metadata must be
    // reloaded, even if the file times have not changed.
    boolean isMarkedCached = isMarkedCached_;
    List<LiteralExpr> keyValues = Lists.newArrayList();
    if (msPartition != null) {
        isMarkedCached = HdfsCachingUtil.getCacheDirIdFromParams(msPartition.getParameters()) != null;
        // Load key values
        for (String partitionKey : msPartition.getValues()) {
            Type type = getColumns().get(keyValues.size()).getType();
            // Deal with Hive's special NULL partition key.
            if (partitionKey.equals(nullPartitionKeyValue_)) {
                keyValues.add(NullLiteral.create(type));
            } else {
                try {
                    keyValues.add(LiteralExpr.create(partitionKey, type));
                } catch (Exception ex) {
                    LOG.warn("Failed to create literal expression of type: " + type, ex);
                    throw new CatalogException("Invalid partition key value of type: " + type, ex);
                }
            }
        }
        try {
            Expr.analyze(keyValues, null);
        } catch (AnalysisException e) {
            // should never happen
            throw new IllegalStateException(e);
        }
    }
    try {
        // Each partition could reside on a different filesystem.
        FileSystem fs = partDirPath.getFileSystem(CONF);
        multipleFileSystems_ = multipleFileSystems_
                || !FileSystemUtil.isPathOnFileSystem(new Path(getLocation()), fs);
        if (fs.exists(partDirPath)) {
            // FileSystem does not have an API that takes in a timestamp and returns a list
            // of files that has been added/changed since. Therefore, we are calling
            // fs.listStatus() to list all the files.
            for (FileStatus fileStatus : fs.listStatus(partDirPath)) {
                String fileName = fileStatus.getPath().getName().toString();
                if (fileStatus.isDirectory() || FileSystemUtil.isHiddenFile(fileName)
                        || HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) {
                    // Ignore directory, hidden file starting with . or _, and LZO index files
                    // If a directory is erroneously created as a subdirectory of a partition dir
                    // we should ignore it and move on. Hive will not recurse into directories.
                    // Skip index files, these are read by the LZO scanner directly.
                    continue;
                }

                String partitionDir = fileStatus.getPath().getParent().toString();
                FileDescriptor fd = null;
                // Search for a FileDescriptor with the same partition dir and file name. If one
                // is found, it will be chosen as a candidate to reuse.
                if (oldFileDescMap != null && oldFileDescMap.get(partitionDir) != null) {
                    for (FileDescriptor oldFileDesc : oldFileDescMap.get(partitionDir)) {
                        if (oldFileDesc.getFileName().equals(fileName)) {
                            fd = oldFileDesc;
                            break;
                        }
                    }
                }

                // Check if this FileDescriptor has been modified since last loading its block
                // location information. If it has not been changed, the previously loaded
                // value can be reused.
                if (fd == null || isMarkedCached || fd.getFileLength() != fileStatus.getLen()
                        || fd.getModificationTime() != fileStatus.getModificationTime()) {
                    // Create a new file descriptor, the block metadata will be populated by
                    // loadBlockMd.
                    fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime());
                    addPerFsFileDesc(perFsFileDescMap, fs, partitionDir, fd);
                }

                List<FileDescriptor> fds = fileDescMap_.get(partitionDir);
                if (fds == null) {
                    fds = Lists.newArrayList();
                    fileDescMap_.put(partitionDir, fds);
                }
                fds.add(fd);

                // Add to the list of FileDescriptors for this partition.
                fileDescriptors.add(fd);
            }
            numHdfsFiles_ += fileDescriptors.size();
        }
        HdfsPartition partition = new HdfsPartition(this, msPartition, keyValues, fileFormatDescriptor,
                fileDescriptors, getAvailableAccessLevel(fs, partDirPath));
        partition.checkWellFormed();
        return partition;
    } catch (Exception e) {
        throw new CatalogException("Failed to create partition: ", e);
    }
}

From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java

License:Apache License

/**
 * List file status by calling abstractFileSystem.listStatusIterator.
 *///  www .j a  v a  2s.  c o  m
private static void listStatusIterator(String dirPath) {
    Path path = new Path(dirPath);
    boolean exceptionThrown = false;
    try {
        AbstractFileSystem fs = AbstractFileSystem.createFileSystem(path.toUri(), LoadMetadataUtil.getConf());
        RemoteIterator<FileStatus> iter = fs.listStatusIterator(path);
        while (iter.hasNext()) {
            FileStatus fileStatus = iter.next();
            BlockLocation[] locations = fs.getFileBlockLocations(fileStatus.getPath(), 0, fileStatus.getLen());
            for (BlockLocation loc : locations) {
                loc.getNames();
                loc.getHosts();
            }
        }
    } catch (IOException e) {
        exceptionThrown = true;
        LOG.error("Failed to list Status Iterator", e);
    }
    assertFalse(exceptionThrown);
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Performs a non-recursive delete of all visible (non-hidden) files in a given
 * directory. Returns the number of files deleted as part of this operation.
 *///from   ww w .  j av a2s .co  m
public static int deleteAllVisibleFiles(Path directory) throws IOException {
    FileSystem fs = directory.getFileSystem(CONF);
    Preconditions.checkState(fs.getFileStatus(directory).isDirectory());
    int numFilesDeleted = 0;
    for (FileStatus fStatus : fs.listStatus(directory)) {
        // Only delete files that are not hidden.
        if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) {
            LOG.debug("Removing: " + fStatus.getPath());
            fs.delete(fStatus.getPath(), false);
            ++numFilesDeleted;
        }
    }
    return numFilesDeleted;
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Returns the total number of visible (non-hidden) files in a directory.
 *///from w w w .jav a2s. c  o m
public static int getTotalNumVisibleFiles(Path directory) throws IOException {
    FileSystem fs = directory.getFileSystem(CONF);
    Preconditions.checkState(fs.getFileStatus(directory).isDirectory());
    int numFiles = 0;
    for (FileStatus fStatus : fs.listStatus(directory)) {
        // Only delete files that are not hidden.
        if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) {
            ++numFiles;
        }
    }
    return numFiles;
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Moves all visible (non-hidden) files from a source directory to a destination
 * directory. Any sub-directories within the source directory are skipped.
 * Returns the number of files moved as part of this operation.
 *///from   w ww  . j  av  a  2  s  .  c om
public static int moveAllVisibleFiles(Path sourceDir, Path destDir) throws IOException {
    FileSystem fs = destDir.getFileSystem(CONF);
    Preconditions.checkState(fs.isDirectory(destDir));
    Preconditions.checkState(fs.isDirectory(sourceDir));

    // Use the same UUID to resolve all file name conflicts. This helps mitigate problems
    // that might happen if there is a conflict moving a set of files that have
    // dependent file names. For example, foo.lzo and foo.lzo_index.
    UUID uuid = UUID.randomUUID();

    // Enumerate all the files in the source
    int numFilesMoved = 0;
    for (FileStatus fStatus : fs.listStatus(sourceDir)) {
        if (fStatus.isDirectory()) {
            LOG.debug("Skipping copy of directory: " + fStatus.getPath());
            continue;
        } else if (isHiddenFile(fStatus.getPath().getName())) {
            continue;
        }

        Path destFile = new Path(destDir, fStatus.getPath().getName());
        if (fs.exists(destFile)) {
            destFile = new Path(destDir, appendToBaseFileName(destFile.getName(), uuid.toString()));
        }
        FileSystemUtil.moveFile(fStatus.getPath(), destFile, false);
        ++numFilesMoved;
    }
    return numFilesMoved;
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Load and return a list of file descriptors for the files in 'dirPath', using the
 * listStatus HDFS API in filesystem to load filestatus. It will not load the file
 * descriptor if the file is a directory, or hidden file starting with . or _, or LZO
 * index files. If the file can be found in the old File description map and not
 * modified, and not 'isMarkedCached' - partition marked as cached, just reuse the one
 * in cache. Otherwise it will create a new File description with filename, file length
 * and modification time.//ww  w .  j  a va2s.com
 *
 * Must be threadsafe. Access to 'oldFileDescMap', 'perFsFileBlocks', 'hostIndex' and
 * 'fileDescMap' must be protected.
 */
public static List<FileDescriptor> loadFileDescriptors(FileSystem fs, Path dirPath,
        Map<String, List<FileDescriptor>> oldFileDescMap, HdfsFileFormat fileFormat,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, boolean isMarkedCached, String tblName,
        ListMap<TNetworkAddress> hostIndex, Map<String, List<FileDescriptor>> fileDescMap)
        throws FileNotFoundException, IOException {
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();

    for (FileStatus fileStatus : fs.listStatus(dirPath)) {
        FileDescriptor fd = getFileDescriptor(fs, fileStatus, fileFormat, oldFileDescMap, isMarkedCached,
                perFsFileBlocks, tblName, hostIndex);

        if (fd == null)
            continue;

        // Add partition dir to fileDescMap if it does not exist.
        String partitionDir = fileStatus.getPath().getParent().toString();
        synchronized (fileDescMap) {
            if (!fileDescMap.containsKey(partitionDir)) {
                fileDescMap.put(partitionDir, new ArrayList<FileDescriptor>());
            }
            fileDescMap.get(partitionDir).add(fd);
        }

        // Add to the list of FileDescriptors for this partition.
        fileDescriptors.add(fd);
    }

    return fileDescriptors;
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Identical to loadFileDescriptors, except using the ListStatusIterator HDFS API to
 * load file status./*ww  w  .  jav  a 2  s . c om*/
 */
public static List<FileDescriptor> loadViaListStatusIterator(FileSystem fs, Path partDirPath,
        Map<String, List<FileDescriptor>> oldFileDescMap, HdfsFileFormat fileFormat,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, boolean isMarkedCached, String tblName,
        ListMap<TNetworkAddress> hostIndex, Map<String, List<FileDescriptor>> fileDescMap)
        throws FileNotFoundException, IOException {
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();

    AbstractFileSystem abstractFs = AbstractFileSystem.createFileSystem(partDirPath.toUri(), CONF);
    RemoteIterator<FileStatus> fileStatusItor = abstractFs.listStatusIterator(partDirPath);

    while (fileStatusItor.hasNext()) {
        FileStatus fileStatus = fileStatusItor.next();
        FileDescriptor fd = getFileDescriptor(fs, fileStatus, fileFormat, oldFileDescMap, isMarkedCached,
                perFsFileBlocks, tblName, hostIndex);

        if (fd == null)
            continue;

        // Add partition dir to fileDescMap if it does not exist.
        String partitionDir = fileStatus.getPath().getParent().toString();
        if (!fileDescMap.containsKey(partitionDir)) {
            fileDescMap.put(partitionDir, new ArrayList<FileDescriptor>());
        }
        fileDescMap.get(partitionDir).add(fd);

        // Add to the list of FileDescriptors for this partition.
        fileDescriptors.add(fd);
    }

    return fileDescriptors;
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Get file descriptor according to fileStatus and oldFileDescMap. It will return null
 * if the file is a directory, or hidden file starting with . or _, or LZO index files.
 * If the file can be found in the old File description map and not modified, and not
 * 'isMarkedCached' - partition marked as cached, just reuse the one in cache. Otherwise
 * it will create a new File description with filename, file length and modification
 * time.//w  w w. jav  a 2  s.c o  m
 *
 * Must be thread safe. Access to 'oldFileDescMap', 'perFsFileBlocks' and 'hostIndex'
 * must be protected.
 */
private static FileDescriptor getFileDescriptor(FileSystem fs, FileStatus fileStatus, HdfsFileFormat fileFormat,
        Map<String, List<FileDescriptor>> oldFileDescMap, boolean isMarkedCached,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, String tblName, ListMap<TNetworkAddress> hostIndex) {
    String fileName = fileStatus.getPath().getName().toString();

    if (fileStatus.isDirectory() || FileSystemUtil.isHiddenFile(fileName)
            || HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) {
        // Ignore directory, hidden file starting with . or _, and LZO index files
        // If a directory is erroneously created as a subdirectory of a partition dir
        // we should ignore it and move on. Hive will not recurse into directories.
        // Skip index files, these are read by the LZO scanner directly.
        return null;
    }

    String partitionDir = fileStatus.getPath().getParent().toString();
    FileDescriptor fd = null;
    // Search for a FileDescriptor with the same partition dir and file name. If one
    // is found, it will be chosen as a candidate to reuse.
    if (oldFileDescMap != null) {
        synchronized (oldFileDescMap) {
            if (oldFileDescMap.get(partitionDir) != null) {
                for (FileDescriptor oldFileDesc : oldFileDescMap.get(partitionDir)) {
                    // TODO: This doesn't seem like the right data structure if a directory has
                    // a lot of files.
                    if (oldFileDesc.getFileName().equals(fileName)) {
                        fd = oldFileDesc;
                        break;
                    }
                }
            }
        }
    }

    // Check if this FileDescriptor has been modified since last loading its block
    // location information. If it has not been changed, the previously loaded value can
    // be reused.
    if (fd == null || isMarkedCached || fd.getFileLength() != fileStatus.getLen()
            || fd.getModificationTime() != fileStatus.getModificationTime()) {
        // Create a new file descriptor and load the file block metadata,
        // collecting the block metadata into perFsFileBlocks.  The disk IDs for
        // all the blocks of each filesystem will be loaded by loadDiskIds().
        fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime());
        loadBlockMetadata(fs, fileStatus, fd, fileFormat, perFsFileBlocks, tblName, hostIndex);
    }

    return fd;
}