Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.cloudera.hoop.fs.FSUtils.java

License:Open Source License

/**
 * Converts a Hadoop <code>FileStatus</code> object into a JSON array
 * object. It replaces the <code>SCHEME://HOST:PORT</code> of the path
 * with the  specified URL.//ww w  .  j  a  v  a 2s.co  m
 * <p/>
 * @param status Hadoop file status.
 * @param hoopBaseUrl base URL to replace the
 * <code>SCHEME://HOST:PORT</code> in the file status.
 * @return The JSON representation of the file status.
 */
@SuppressWarnings("unchecked")
public static Map fileStatusToJSON(FileStatus status, String hoopBaseUrl) {
    Map json = new LinkedHashMap();
    json.put("path", convertPathToHoop(status.getPath(), hoopBaseUrl).toString());
    json.put("isDir", status.isDir());
    json.put("len", status.getLen());
    json.put("owner", status.getOwner());
    json.put("group", status.getGroup());
    json.put("permission", permissionToString(status.getPermission()));
    json.put("accessTime", status.getAccessTime());
    json.put("modificationTime", status.getModificationTime());
    json.put("blockSize", status.getBlockSize());
    json.put("replication", status.getReplication());
    return json;
}

From source file:com.cloudera.impala.analysis.LoadDataStmt.java

License:Apache License

private void analyzePaths(Analyzer analyzer, HdfsTable hdfsTable) throws AnalysisException {
    // The user must have permission to access the source location. Since the files will
    // be moved from this location, the user needs to have all permission.
    sourceDataPath_.analyze(analyzer, Privilege.ALL);

    try {//w  ww .ja  v a 2  s  . c o  m
        Path source = sourceDataPath_.getPath();
        FileSystem fs = source.getFileSystem(FileSystemUtil.getConfiguration());
        // sourceDataPath_.analyze() ensured that path is on an HDFS filesystem.
        Preconditions.checkState(fs instanceof DistributedFileSystem);
        DistributedFileSystem dfs = (DistributedFileSystem) fs;
        if (!dfs.exists(source)) {
            throw new AnalysisException(String.format("INPATH location '%s' does not exist.", sourceDataPath_));
        }

        if (dfs.isDirectory(source)) {
            if (FileSystemUtil.getTotalNumVisibleFiles(source) == 0) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' contains no visible files.", sourceDataPath_));
            }
            if (FileSystemUtil.containsSubdirectory(source)) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' cannot contain subdirectories.", sourceDataPath_));
            }
        } else { // INPATH points to a file.
            if (FileSystemUtil.isHiddenFile(source.getName())) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' points to a hidden file.", source));
            }
        }

        String noWriteAccessErrorMsg = String.format(
                "Unable to LOAD DATA into "
                        + "target table (%s) because Impala does not have WRITE access to HDFS " + "location: ",
                hdfsTable.getFullName());

        HdfsPartition partition;
        String location;
        if (partitionSpec_ != null) {
            partition = hdfsTable.getPartition(partitionSpec_.getPartitionSpecKeyValues());
            location = partition.getLocation();
            if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {
                throw new AnalysisException(noWriteAccessErrorMsg + partition.getLocation());
            }
        } else {
            // "default" partition
            partition = hdfsTable.getPartitions().get(0);
            location = hdfsTable.getLocation();
            if (!hdfsTable.hasWriteAccess()) {
                throw new AnalysisException(noWriteAccessErrorMsg + hdfsTable.getLocation());
            }
        }
        Preconditions.checkNotNull(partition);

        // Until Frontend.loadTableData() can handle cross-filesystem and filesystems
        // that aren't HDFS, require that source and dest are on the same HDFS.
        if (!FileSystemUtil.isPathOnFileSystem(new Path(location), fs)) {
            throw new AnalysisException(String.format(
                    "Unable to LOAD DATA into target table (%s) because source path (%s) and "
                            + "destination %s (%s) are on different file-systems.",
                    hdfsTable.getFullName(), source, partitionSpec_ == null ? "table" : "partition",
                    partition.getLocation()));
        }
        // Verify the files being loaded are supported.
        for (FileStatus fStatus : fs.listStatus(source)) {
            if (fs.isDirectory(fStatus.getPath()))
                continue;
            StringBuilder errorMsg = new StringBuilder();
            HdfsFileFormat fileFormat = partition.getInputFormatDescriptor().getFileFormat();
            if (!fileFormat.isFileCompressionTypeSupported(fStatus.getPath().toString(), errorMsg)) {
                throw new AnalysisException(errorMsg.toString());
            }
        }
    } catch (FileNotFoundException e) {
        throw new AnalysisException("File not found: " + e.getMessage(), e);
    } catch (IOException e) {
        throw new AnalysisException("Error accessing file system: " + e.getMessage(), e);
    }
}

From source file:com.cloudera.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Creates a new HdfsPartition object to be added to the internal partition list.
 * Populates with file format information and file locations. Partitions may be empty,
 * or may not even exist on the file system (a partition's location may have been
 * changed to a new path that is about to be created by an INSERT). For unchanged
 * files (indicated by unchanged mtime), reuses the FileDescriptor from the
 * oldFileDescMap. The one exception is if the partition is marked as cached
 * in which case the block metadata cannot be reused. Otherwise, creates a new
 * FileDescriptor for each modified or new file and adds it to newFileDescMap.
 * Both old and newFileDescMap are Maps of parent directory (partition location)
 * to list of files (FileDescriptors) under that directory.
 * Returns new partition if successful or null if none was added.
 * Separated from addPartition to reduce the number of operations done
 * while holding the lock on the hdfs table.
        //from  ww  w  .ja v  a 2s .c  om
 *  @throws CatalogException
 *    if the supplied storage descriptor contains metadata that Impala can't
 *    understand.
 */
private HdfsPartition createPartition(StorageDescriptor storageDescriptor,
        org.apache.hadoop.hive.metastore.api.Partition msPartition,
        Map<String, List<FileDescriptor>> oldFileDescMap,
        Map<FsKey, Map<String, List<FileDescriptor>>> perFsFileDescMap) throws CatalogException {
    HdfsStorageDescriptor fileFormatDescriptor = HdfsStorageDescriptor.fromStorageDescriptor(this.name_,
            storageDescriptor);
    Path partDirPath = new Path(storageDescriptor.getLocation());
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();
    // If the partition is marked as cached, the block location metadata must be
    // reloaded, even if the file times have not changed.
    boolean isMarkedCached = isMarkedCached_;
    List<LiteralExpr> keyValues = Lists.newArrayList();
    if (msPartition != null) {
        isMarkedCached = HdfsCachingUtil.getCacheDirIdFromParams(msPartition.getParameters()) != null;
        // Load key values
        for (String partitionKey : msPartition.getValues()) {
            Type type = getColumns().get(keyValues.size()).getType();
            // Deal with Hive's special NULL partition key.
            if (partitionKey.equals(nullPartitionKeyValue_)) {
                keyValues.add(NullLiteral.create(type));
            } else {
                try {
                    keyValues.add(LiteralExpr.create(partitionKey, type));
                } catch (Exception ex) {
                    LOG.warn("Failed to create literal expression of type: " + type, ex);
                    throw new CatalogException("Invalid partition key value of type: " + type, ex);
                }
            }
        }
        try {
            Expr.analyze(keyValues, null);
        } catch (AnalysisException e) {
            // should never happen
            throw new IllegalStateException(e);
        }
    }
    try {
        // Each partition could reside on a different filesystem.
        FileSystem fs = partDirPath.getFileSystem(CONF);
        multipleFileSystems_ = multipleFileSystems_
                || !FileSystemUtil.isPathOnFileSystem(new Path(getLocation()), fs);
        if (fs.exists(partDirPath)) {
            // FileSystem does not have an API that takes in a timestamp and returns a list
            // of files that has been added/changed since. Therefore, we are calling
            // fs.listStatus() to list all the files.
            for (FileStatus fileStatus : fs.listStatus(partDirPath)) {
                String fileName = fileStatus.getPath().getName().toString();
                if (fileStatus.isDirectory() || FileSystemUtil.isHiddenFile(fileName)
                        || HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) {
                    // Ignore directory, hidden file starting with . or _, and LZO index files
                    // If a directory is erroneously created as a subdirectory of a partition dir
                    // we should ignore it and move on. Hive will not recurse into directories.
                    // Skip index files, these are read by the LZO scanner directly.
                    continue;
                }

                String partitionDir = fileStatus.getPath().getParent().toString();
                FileDescriptor fd = null;
                // Search for a FileDescriptor with the same partition dir and file name. If one
                // is found, it will be chosen as a candidate to reuse.
                if (oldFileDescMap != null && oldFileDescMap.get(partitionDir) != null) {
                    for (FileDescriptor oldFileDesc : oldFileDescMap.get(partitionDir)) {
                        if (oldFileDesc.getFileName().equals(fileName)) {
                            fd = oldFileDesc;
                            break;
                        }
                    }
                }

                // Check if this FileDescriptor has been modified since last loading its block
                // location information. If it has not been changed, the previously loaded
                // value can be reused.
                if (fd == null || isMarkedCached || fd.getFileLength() != fileStatus.getLen()
                        || fd.getModificationTime() != fileStatus.getModificationTime()) {
                    // Create a new file descriptor, the block metadata will be populated by
                    // loadBlockMd.
                    fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime());
                    addPerFsFileDesc(perFsFileDescMap, fs, partitionDir, fd);
                }

                List<FileDescriptor> fds = fileDescMap_.get(partitionDir);
                if (fds == null) {
                    fds = Lists.newArrayList();
                    fileDescMap_.put(partitionDir, fds);
                }
                fds.add(fd);

                // Add to the list of FileDescriptors for this partition.
                fileDescriptors.add(fd);
            }
            numHdfsFiles_ += fileDescriptors.size();
        }
        HdfsPartition partition = new HdfsPartition(this, msPartition, keyValues, fileFormatDescriptor,
                fileDescriptors, getAvailableAccessLevel(fs, partDirPath));
        partition.checkWellFormed();
        return partition;
    } catch (Exception e) {
        throw new CatalogException("Failed to create partition: ", e);
    }
}

From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java

License:Apache License

/**
 * List file status by calling abstractFileSystem.listStatusIterator.
 *///  www .j a  v a  2s.  c o  m
private static void listStatusIterator(String dirPath) {
    Path path = new Path(dirPath);
    boolean exceptionThrown = false;
    try {
        AbstractFileSystem fs = AbstractFileSystem.createFileSystem(path.toUri(), LoadMetadataUtil.getConf());
        RemoteIterator<FileStatus> iter = fs.listStatusIterator(path);
        while (iter.hasNext()) {
            FileStatus fileStatus = iter.next();
            BlockLocation[] locations = fs.getFileBlockLocations(fileStatus.getPath(), 0, fileStatus.getLen());
            for (BlockLocation loc : locations) {
                loc.getNames();
                loc.getHosts();
            }
        }
    } catch (IOException e) {
        exceptionThrown = true;
        LOG.error("Failed to list Status Iterator", e);
    }
    assertFalse(exceptionThrown);
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Performs a non-recursive delete of all visible (non-hidden) files in a given
 * directory. Returns the number of files deleted as part of this operation.
 *///from   ww w .  j av a2s .co  m
public static int deleteAllVisibleFiles(Path directory) throws IOException {
    FileSystem fs = directory.getFileSystem(CONF);
    Preconditions.checkState(fs.getFileStatus(directory).isDirectory());
    int numFilesDeleted = 0;
    for (FileStatus fStatus : fs.listStatus(directory)) {
        // Only delete files that are not hidden.
        if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) {
            LOG.debug("Removing: " + fStatus.getPath());
            fs.delete(fStatus.getPath(), false);
            ++numFilesDeleted;
        }
    }
    return numFilesDeleted;
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Returns the total number of visible (non-hidden) files in a directory.
 *///from w w w .jav a2s. c  o m
public static int getTotalNumVisibleFiles(Path directory) throws IOException {
    FileSystem fs = directory.getFileSystem(CONF);
    Preconditions.checkState(fs.getFileStatus(directory).isDirectory());
    int numFiles = 0;
    for (FileStatus fStatus : fs.listStatus(directory)) {
        // Only delete files that are not hidden.
        if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) {
            ++numFiles;
        }
    }
    return numFiles;
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Moves all visible (non-hidden) files from a source directory to a destination
 * directory. Any sub-directories within the source directory are skipped.
 * Returns the number of files moved as part of this operation.
 *///from   w ww  . j  av  a  2  s  .  c om
public static int moveAllVisibleFiles(Path sourceDir, Path destDir) throws IOException {
    FileSystem fs = destDir.getFileSystem(CONF);
    Preconditions.checkState(fs.isDirectory(destDir));
    Preconditions.checkState(fs.isDirectory(sourceDir));

    // Use the same UUID to resolve all file name conflicts. This helps mitigate problems
    // that might happen if there is a conflict moving a set of files that have
    // dependent file names. For example, foo.lzo and foo.lzo_index.
    UUID uuid = UUID.randomUUID();

    // Enumerate all the files in the source
    int numFilesMoved = 0;
    for (FileStatus fStatus : fs.listStatus(sourceDir)) {
        if (fStatus.isDirectory()) {
            LOG.debug("Skipping copy of directory: " + fStatus.getPath());
            continue;
        } else if (isHiddenFile(fStatus.getPath().getName())) {
            continue;
        }

        Path destFile = new Path(destDir, fStatus.getPath().getName());
        if (fs.exists(destFile)) {
            destFile = new Path(destDir, appendToBaseFileName(destFile.getName(), uuid.toString()));
        }
        FileSystemUtil.moveFile(fStatus.getPath(), destFile, false);
        ++numFilesMoved;
    }
    return numFilesMoved;
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Load and return a list of file descriptors for the files in 'dirPath', using the
 * listStatus HDFS API in filesystem to load filestatus. It will not load the file
 * descriptor if the file is a directory, or hidden file starting with . or _, or LZO
 * index files. If the file can be found in the old File description map and not
 * modified, and not 'isMarkedCached' - partition marked as cached, just reuse the one
 * in cache. Otherwise it will create a new File description with filename, file length
 * and modification time.//ww  w .  j  a va2s.com
 *
 * Must be threadsafe. Access to 'oldFileDescMap', 'perFsFileBlocks', 'hostIndex' and
 * 'fileDescMap' must be protected.
 */
public static List<FileDescriptor> loadFileDescriptors(FileSystem fs, Path dirPath,
        Map<String, List<FileDescriptor>> oldFileDescMap, HdfsFileFormat fileFormat,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, boolean isMarkedCached, String tblName,
        ListMap<TNetworkAddress> hostIndex, Map<String, List<FileDescriptor>> fileDescMap)
        throws FileNotFoundException, IOException {
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();

    for (FileStatus fileStatus : fs.listStatus(dirPath)) {
        FileDescriptor fd = getFileDescriptor(fs, fileStatus, fileFormat, oldFileDescMap, isMarkedCached,
                perFsFileBlocks, tblName, hostIndex);

        if (fd == null)
            continue;

        // Add partition dir to fileDescMap if it does not exist.
        String partitionDir = fileStatus.getPath().getParent().toString();
        synchronized (fileDescMap) {
            if (!fileDescMap.containsKey(partitionDir)) {
                fileDescMap.put(partitionDir, new ArrayList<FileDescriptor>());
            }
            fileDescMap.get(partitionDir).add(fd);
        }

        // Add to the list of FileDescriptors for this partition.
        fileDescriptors.add(fd);
    }

    return fileDescriptors;
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Identical to loadFileDescriptors, except using the ListStatusIterator HDFS API to
 * load file status./*ww  w  .  jav  a 2  s . c om*/
 */
public static List<FileDescriptor> loadViaListStatusIterator(FileSystem fs, Path partDirPath,
        Map<String, List<FileDescriptor>> oldFileDescMap, HdfsFileFormat fileFormat,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, boolean isMarkedCached, String tblName,
        ListMap<TNetworkAddress> hostIndex, Map<String, List<FileDescriptor>> fileDescMap)
        throws FileNotFoundException, IOException {
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();

    AbstractFileSystem abstractFs = AbstractFileSystem.createFileSystem(partDirPath.toUri(), CONF);
    RemoteIterator<FileStatus> fileStatusItor = abstractFs.listStatusIterator(partDirPath);

    while (fileStatusItor.hasNext()) {
        FileStatus fileStatus = fileStatusItor.next();
        FileDescriptor fd = getFileDescriptor(fs, fileStatus, fileFormat, oldFileDescMap, isMarkedCached,
                perFsFileBlocks, tblName, hostIndex);

        if (fd == null)
            continue;

        // Add partition dir to fileDescMap if it does not exist.
        String partitionDir = fileStatus.getPath().getParent().toString();
        if (!fileDescMap.containsKey(partitionDir)) {
            fileDescMap.put(partitionDir, new ArrayList<FileDescriptor>());
        }
        fileDescMap.get(partitionDir).add(fd);

        // Add to the list of FileDescriptors for this partition.
        fileDescriptors.add(fd);
    }

    return fileDescriptors;
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Get file descriptor according to fileStatus and oldFileDescMap. It will return null
 * if the file is a directory, or hidden file starting with . or _, or LZO index files.
 * If the file can be found in the old File description map and not modified, and not
 * 'isMarkedCached' - partition marked as cached, just reuse the one in cache. Otherwise
 * it will create a new File description with filename, file length and modification
 * time.//w  w w. jav  a 2  s.c o  m
 *
 * Must be thread safe. Access to 'oldFileDescMap', 'perFsFileBlocks' and 'hostIndex'
 * must be protected.
 */
private static FileDescriptor getFileDescriptor(FileSystem fs, FileStatus fileStatus, HdfsFileFormat fileFormat,
        Map<String, List<FileDescriptor>> oldFileDescMap, boolean isMarkedCached,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, String tblName, ListMap<TNetworkAddress> hostIndex) {
    String fileName = fileStatus.getPath().getName().toString();

    if (fileStatus.isDirectory() || FileSystemUtil.isHiddenFile(fileName)
            || HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) {
        // Ignore directory, hidden file starting with . or _, and LZO index files
        // If a directory is erroneously created as a subdirectory of a partition dir
        // we should ignore it and move on. Hive will not recurse into directories.
        // Skip index files, these are read by the LZO scanner directly.
        return null;
    }

    String partitionDir = fileStatus.getPath().getParent().toString();
    FileDescriptor fd = null;
    // Search for a FileDescriptor with the same partition dir and file name. If one
    // is found, it will be chosen as a candidate to reuse.
    if (oldFileDescMap != null) {
        synchronized (oldFileDescMap) {
            if (oldFileDescMap.get(partitionDir) != null) {
                for (FileDescriptor oldFileDesc : oldFileDescMap.get(partitionDir)) {
                    // TODO: This doesn't seem like the right data structure if a directory has
                    // a lot of files.
                    if (oldFileDesc.getFileName().equals(fileName)) {
                        fd = oldFileDesc;
                        break;
                    }
                }
            }
        }
    }

    // Check if this FileDescriptor has been modified since last loading its block
    // location information. If it has not been changed, the previously loaded value can
    // be reused.
    if (fd == null || isMarkedCached || fd.getFileLength() != fileStatus.getLen()
            || fd.getModificationTime() != fileStatus.getModificationTime()) {
        // Create a new file descriptor and load the file block metadata,
        // collecting the block metadata into perFsFileBlocks.  The disk IDs for
        // all the blocks of each filesystem will be loaded by loadDiskIds().
        fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime());
        loadBlockMetadata(fs, fileStatus, fd, fileFormat, perFsFileBlocks, tblName, hostIndex);
    }

    return fd;
}